From 80fb1026d0c91e961405e5f2ef456614a1d530d1 Mon Sep 17 00:00:00 2001 From: luccioman Date: Tue, 30 Jan 2018 21:00:18 +0100 Subject: [PATCH] Create recrawl requests with the relevant crawl profile. Recrawl default profile was previously effectively used for crawl stacker acceptance check, but request entries were indeed still created with the "snippetGlobalText" profile. --- source/net/yacy/crawler/RecrawlBusyThread.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/net/yacy/crawler/RecrawlBusyThread.java b/source/net/yacy/crawler/RecrawlBusyThread.java index bf4d145d3..df7b87f0c 100644 --- a/source/net/yacy/crawler/RecrawlBusyThread.java +++ b/source/net/yacy/crawler/RecrawlBusyThread.java @@ -26,12 +26,14 @@ package net.yacy.crawler; import java.io.IOException; import java.net.MalformedURLException; import java.time.LocalDateTime; +import java.util.Date; import java.util.HashSet; import java.util.Set; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.federate.yacy.CacheStrategy; @@ -194,7 +196,8 @@ public class RecrawlBusyThread extends AbstractBusyThread { final CrawlProfile profile = sb.crawler.defaultRecrawlJobProfile; for (final DigestURL url : this.urlstack) { - final Request request = sb.loader.request(url, true, true); + final Request request = new Request(ASCII.getBytes(this.sb.peers.mySeed().hash), url, null, "", + new Date(), profile.handle(), 0, profile.timezoneOffset()); String acceptedError = sb.crawlStacker.checkAcceptanceChangeable(url, profile, 0); if (!includefailed && acceptedError == null) { // skip check if failed docs to be included acceptedError = sb.crawlStacker.checkAcceptanceInitially(url, profile);