- set the chunksize to 100 to meet the max of the embedded solr
- re-enable sorting (the case where we switched it of should be away)
- enable recrawling on remote-solr
pull/278/head
sgaebel 6 years ago
parent 8f58c1dcfa
commit 8d2e7262d9

@ -140,58 +140,54 @@ public class IndexReIndexMonitor_p {
if (recrawlbt == null || recrawlbt.shutdownInProgress()) { if (recrawlbt == null || recrawlbt.shutdownInProgress()) {
prop.put("recrawljobrunning_simulationResult", 0); prop.put("recrawljobrunning_simulationResult", 0);
prop.put("recrawljobrunning_error", 0); prop.put("recrawljobrunning_error", 0);
if(!sb.index.fulltext().connectedLocalSolr()) { if (post.containsKey("recrawlnow")) {
prop.put("recrawljobrunning_error", 1); // Re-crawl works only with an embedded local Solr index sb.deployThread(RecrawlBusyThread.THREAD_NAME, "ReCrawl", "recrawl existing documents", null,
} else { new RecrawlBusyThread(Switchboard.getSwitchboard(), recrawlQuery, inclerrdoc), 1000);
if (post.containsKey("recrawlnow")) { recrawlbt = sb.getThread(RecrawlBusyThread.THREAD_NAME);
sb.deployThread(RecrawlBusyThread.THREAD_NAME, "ReCrawl", "recrawl existing documents", null,
new RecrawlBusyThread(Switchboard.getSwitchboard(), recrawlQuery, inclerrdoc), 1000); /* store this call as an api call for easy scheduling possibility */
recrawlbt = sb.getThread(RecrawlBusyThread.THREAD_NAME); if(sb.tables != null) {
/* We avoid creating a duplicate of any already recorded API call with the same parameters */
/* store this call as an api call for easy scheduling possibility */ final Row lastExecutedCall = WorkTables
if(sb.tables != null) { .selectLastExecutedApiCall(IndexReIndexMonitor_p.SERVLET_NAME, post, sb);
/* We avoid creating a duplicate of any already recorded API call with the same parameters */ if (lastExecutedCall != null && !post.containsKey(WorkTables.TABLE_API_COL_APICALL_PK)) {
final Row lastExecutedCall = WorkTables byte[] lastExecutedCallPk = lastExecutedCall.getPK();
.selectLastExecutedApiCall(IndexReIndexMonitor_p.SERVLET_NAME, post, sb); if (lastExecutedCallPk != null) {
if (lastExecutedCall != null && !post.containsKey(WorkTables.TABLE_API_COL_APICALL_PK)) { post.add(WorkTables.TABLE_API_COL_APICALL_PK, UTF8.String(lastExecutedCallPk));
byte[] lastExecutedCallPk = lastExecutedCall.getPK(); }
if (lastExecutedCallPk != null) { }
post.add(WorkTables.TABLE_API_COL_APICALL_PK, UTF8.String(lastExecutedCallPk)); sb.tables.recordAPICall(post, IndexReIndexMonitor_p.SERVLET_NAME, WorkTables.TABLE_API_TYPE_CRAWLER,
} "Recrawl documents matching selection query : " + recrawlQuery);
} }
sb.tables.recordAPICall(post, IndexReIndexMonitor_p.SERVLET_NAME, WorkTables.TABLE_API_TYPE_CRAWLER, } else if(post.containsKey("simulateRecrawl")) {
"Recrawl documents matching selection query : " + recrawlQuery); final SolrConnector solrConnector = sb.index.fulltext().getDefaultConnector();
} if (solrConnector != null && !solrConnector.isClosed()) {
} else if(post.containsKey("simulateRecrawl")) { try {
final SolrConnector solrConnector = sb.index.fulltext().getDefaultConnector(); /* Ensure indexed data is up-to-date */
if (solrConnector != null && !solrConnector.isClosed()) { solrConnector.commit(true);
try { // query all or only httpstatus=200 depending on includefailed flag
/* Ensure indexed data is up-to-date */ final String finalQuery = RecrawlBusyThread.buildSelectionQuery(recrawlQuery, inclerrdoc);
solrConnector.commit(true); final long count = solrConnector.getCountByQuery(finalQuery);
// query all or only httpstatus=200 depending on includefailed flag prop.put("recrawljobrunning_simulationResult", 1);
final String finalQuery = RecrawlBusyThread.buildSelectionQuery(recrawlQuery, inclerrdoc); prop.put("recrawljobrunning_simulationResult_docCount", count);
final long count = solrConnector.getCountByQuery(finalQuery); if(count > 0) {
prop.put("recrawljobrunning_simulationResult", 1); /* Got some results : add a link to the related solr select URL for easily browsing results */
prop.put("recrawljobrunning_simulationResult_docCount", count); final int maxRows = 10;
if(count > 0) { final String solrSelectUrl = genLocalSolrSelectUrl(finalQuery, maxRows);
/* Got some results : add a link to the related solr select URL for easily browsing results */ prop.put("recrawljobrunning_simulationResult_showSelectLink", 1);
final int maxRows = 10; prop.put("recrawljobrunning_simulationResult_showSelectLink_rows", maxRows);
final String solrSelectUrl = genLocalSolrSelectUrl(finalQuery, maxRows); prop.put("recrawljobrunning_simulationResult_showSelectLink_browseSelectedUrl", solrSelectUrl);
prop.put("recrawljobrunning_simulationResult_showSelectLink", 1); } else {
prop.put("recrawljobrunning_simulationResult_showSelectLink_rows", maxRows); prop.put("recrawljobrunning_simulationResult_showSelectLink", 0);
prop.put("recrawljobrunning_simulationResult_showSelectLink_browseSelectedUrl", solrSelectUrl); }
} else { } catch (final IOException e) {
prop.put("recrawljobrunning_simulationResult_showSelectLink", 0); prop.put("recrawljobrunning_simulationResult", 2);
} ConcurrentLog.logException(e);
} catch (final IOException e) { }
prop.put("recrawljobrunning_simulationResult", 2); } else {
ConcurrentLog.logException(e); prop.put("recrawljobrunning_simulationResult", 3);
} }
} else { }
prop.put("recrawljobrunning_simulationResult", 3);
}
}
}
if(post.containsKey("recrawlDefaults")) { if(post.containsKey("recrawlDefaults")) {
recrawlQuery = RecrawlBusyThread.DEFAULT_QUERY; recrawlQuery = RecrawlBusyThread.DEFAULT_QUERY;

@ -72,7 +72,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
private boolean includefailed; private boolean includefailed;
private int chunkstart = 0; private int chunkstart = 0;
private final int chunksize; private final int chunksize = 100;
private final Switchboard sb; private final Switchboard sb;
/** buffer of urls to recrawl */ /** buffer of urls to recrawl */
@ -129,8 +129,7 @@ public class RecrawlBusyThread extends AbstractBusyThread {
this.urlstack = new HashSet<DigestURL>(); this.urlstack = new HashSet<DigestURL>();
// workaround to prevent solr exception on existing index (not fully reindexed) since intro of schema with docvalues // workaround to prevent solr exception on existing index (not fully reindexed) since intro of schema with docvalues
// org.apache.solr.core.SolrCore java.lang.IllegalStateException: unexpected docvalues type NONE for field 'load_date_dt' (expected=NUMERIC). Use UninvertingReader or index with docvalues. // org.apache.solr.core.SolrCore java.lang.IllegalStateException: unexpected docvalues type NONE for field 'load_date_dt' (expected=NUMERIC). Use UninvertingReader or index with docvalues.
this.solrSortBy = null; // CollectionSchema.load_date_dt.getSolrFieldName() + " asc"; solrSortBy = CollectionSchema.load_date_dt.getSolrFieldName() + " asc";
this.chunksize = sb.getConfigInt(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 200);
final SolrConnector solrConnector = this.sb.index.fulltext().getDefaultConnector(); final SolrConnector solrConnector = this.sb.index.fulltext().getDefaultConnector();
if (solrConnector != null && !solrConnector.isClosed()) { if (solrConnector != null && !solrConnector.isClosed()) {

Loading…
Cancel
Save