diff --git a/htroot/Autocrawl_p.html b/htroot/Autocrawl_p.html index db77de051..3202a10d0 100644 --- a/htroot/Autocrawl_p.html +++ b/htroot/Autocrawl_p.html @@ -20,7 +20,7 @@ #(changed)#::
You need to restart for some settings to be applied
#(/changed)#
Enable Autocrawler:
-
Deep crawl every:
+
Deep crawl every Nth document:
Warning: if this is bigger than "Rows to fetch" only shallow crawls will run. @@ -47,4 +47,4 @@ - \ No newline at end of file + diff --git a/locales/master.lng.xlf b/locales/master.lng.xlf index 94e77d2eb..8deee4954 100644 --- a/locales/master.lng.xlf +++ b/locales/master.lng.xlf @@ -211,7 +211,7 @@ Enable Autocrawler: - Deep crawl every: + Deep crawl every Nth document: Warning: if this is bigger than "Rows to fetch" only shallow crawls will run. diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index ea9c3b167..38c8e8a2c 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -608,12 +608,19 @@ public class CrawlQueues { int i = 0; int deepRatio = Integer.parseInt(this.sb.getConfig(SwitchboardConstants.AUTOCRAWL_RATIO, "50")); for (SolrDocument doc: resp.getResults()) { + if (doc == null) { + continue; + } boolean deep = false; i++; if( i % deepRatio == 0 ){ deep = true; } DigestURL url; + if (doc.getFieldValue("url_protocol_s") == null || doc.getFieldValue("host_s") == null) { + //Skip this document if either of these values is null. + continue; + } final String u = doc.getFieldValue("url_protocol_s").toString() + "://" + doc.getFieldValue("host_s").toString(); try { url = new DigestURL(u);