sixcooler 9 years ago
commit 41c9215174

@ -35,6 +35,7 @@ import net.yacy.crawler.data.NoticedURL;
import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.Request;
import net.yacy.kelondro.workflow.AbstractBusyThread; import net.yacy.kelondro.workflow.AbstractBusyThread;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
@ -53,7 +54,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
private String currentQuery = CollectionSchema.fresh_date_dt.getSolrFieldName()+":[* TO NOW/DAY-1DAY]"; // current query private String currentQuery = CollectionSchema.fresh_date_dt.getSolrFieldName()+":[* TO NOW/DAY-1DAY]"; // current query
private boolean includefailed = false; // flag if docs with httpstatus_i <> 200 shall be recrawled private boolean includefailed = false; // flag if docs with httpstatus_i <> 200 shall be recrawled
private int chunkstart = 0; private int chunkstart = 0;
private int chunksize = 200; private final int chunksize;
final Switchboard sb; final Switchboard sb;
private final Set<DigestURL> urlstack; // buffer of urls to recrawl private final Set<DigestURL> urlstack; // buffer of urls to recrawl
public long urlsfound = 0; public long urlsfound = 0;
@ -70,6 +71,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
// workaround to prevent solr exception on existing index (not fully reindexed) since intro of schema with docvalues // workaround to prevent solr exception on existing index (not fully reindexed) since intro of schema with docvalues
// org.apache.solr.core.SolrCore java.lang.IllegalStateException: unexpected docvalues type NONE for field 'load_date_dt' (expected=NUMERIC). Use UninvertingReader or index with docvalues. // org.apache.solr.core.SolrCore java.lang.IllegalStateException: unexpected docvalues type NONE for field 'load_date_dt' (expected=NUMERIC). Use UninvertingReader or index with docvalues.
solrSortBy = null; // CollectionSchema.load_date_dt.getSolrFieldName() + " asc"; solrSortBy = null; // CollectionSchema.load_date_dt.getSolrFieldName() + " asc";
this.chunksize = sb.getConfigInt(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 200);
} }
/** /**
@ -146,8 +148,8 @@ public class RecrawlBusyThread extends AbstractBusyThread {
*/ */
@Override @Override
public boolean job() { public boolean job() {
// other crawls are running, do nothing // more than chunksize crawls are running, do nothing
if (sb.crawlQueues.coreCrawlJobSize() > 0) { if (sb.crawlQueues.coreCrawlJobSize() > this.chunksize) {
return false; return false;
} }

Loading…
Cancel
Save