diff --git a/htroot/IndexReIndexMonitor_p.html b/htroot/IndexReIndexMonitor_p.html index fab09217e..51e15593c 100644 --- a/htroot/IndexReIndexMonitor_p.html +++ b/htroot/IndexReIndexMonitor_p.html @@ -70,13 +70,16 @@ #(recrawljobrunning)#
+ #(error)#:: + + #(/error)#
#(simulationResult)# :: - + :: :: diff --git a/htroot/IndexReIndexMonitor_p.java b/htroot/IndexReIndexMonitor_p.java index 83841a34e..9d3acac58 100644 --- a/htroot/IndexReIndexMonitor_p.java +++ b/htroot/IndexReIndexMonitor_p.java @@ -18,6 +18,9 @@ * . */ import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; @@ -134,42 +137,59 @@ public class IndexReIndexMonitor_p { if (recrawlbt == null || recrawlbt.shutdownInProgress()) { prop.put("recrawljobrunning_simulationResult", 0); - if (post.containsKey("recrawlnow") && sb.index.fulltext().connectedLocalSolr()) { - sb.deployThread(RecrawlBusyThread.THREAD_NAME, "ReCrawl", "recrawl existing documents", null, - new RecrawlBusyThread(Switchboard.getSwitchboard(), recrawlQuery, inclerrdoc), 1000); - recrawlbt = sb.getThread(RecrawlBusyThread.THREAD_NAME); - - /* store this call as an api call for easy scheduling possibility */ - if(sb.tables != null) { - /* We avoid creating a duplicate of any already recorded API call with the same parameters */ - final Row lastExecutedCall = WorkTables - .selectLastExecutedApiCall(IndexReIndexMonitor_p.SERVLET_NAME, post, sb); - if (lastExecutedCall != null && !post.containsKey(WorkTables.TABLE_API_COL_APICALL_PK)) { - byte[] lastExecutedCallPk = lastExecutedCall.getPK(); - if (lastExecutedCallPk != null) { - post.add(WorkTables.TABLE_API_COL_APICALL_PK, UTF8.String(lastExecutedCallPk)); - } - } - sb.tables.recordAPICall(post, IndexReIndexMonitor_p.SERVLET_NAME, WorkTables.TABLE_API_TYPE_CRAWLER, - "Recrawl documents matching selection query : " + recrawlQuery); - } + prop.put("recrawljobrunning_error", 0); + if(!sb.index.fulltext().connectedLocalSolr()) { + prop.put("recrawljobrunning_error", 1); // Re-crawl works only with an embedded local Solr index + } else { + if (post.containsKey("recrawlnow")) { + sb.deployThread(RecrawlBusyThread.THREAD_NAME, "ReCrawl", "recrawl existing documents", null, + new RecrawlBusyThread(Switchboard.getSwitchboard(), recrawlQuery, inclerrdoc), 1000); + recrawlbt = sb.getThread(RecrawlBusyThread.THREAD_NAME); - } else if(post.containsKey("simulateRecrawl") && sb.index.fulltext().connectedLocalSolr()) { - SolrConnector solrConnector = sb.index.fulltext().getDefaultConnector(); - if (!solrConnector.isClosed()) { - try { - // query all or only httpstatus=200 depending on includefailed flag - final long count = solrConnector.getCountByQuery(RecrawlBusyThread.buildSelectionQuery(recrawlQuery, inclerrdoc)); - prop.put("recrawljobrunning_simulationResult", 1); - prop.put("recrawljobrunning_simulationResult_docCount", count); - } catch (final IOException e) { - prop.put("recrawljobrunning_simulationResult", 2); - ConcurrentLog.logException(e); - } - } else { - prop.put("recrawljobrunning_simulationResult", 3); - } - } else if(post.containsKey("recrawlDefaults")) { + /* store this call as an api call for easy scheduling possibility */ + if(sb.tables != null) { + /* We avoid creating a duplicate of any already recorded API call with the same parameters */ + final Row lastExecutedCall = WorkTables + .selectLastExecutedApiCall(IndexReIndexMonitor_p.SERVLET_NAME, post, sb); + if (lastExecutedCall != null && !post.containsKey(WorkTables.TABLE_API_COL_APICALL_PK)) { + byte[] lastExecutedCallPk = lastExecutedCall.getPK(); + if (lastExecutedCallPk != null) { + post.add(WorkTables.TABLE_API_COL_APICALL_PK, UTF8.String(lastExecutedCallPk)); + } + } + sb.tables.recordAPICall(post, IndexReIndexMonitor_p.SERVLET_NAME, WorkTables.TABLE_API_TYPE_CRAWLER, + "Recrawl documents matching selection query : " + recrawlQuery); + } + } else if(post.containsKey("simulateRecrawl")) { + final SolrConnector solrConnector = sb.index.fulltext().getDefaultConnector(); + if (!solrConnector.isClosed()) { + try { + // query all or only httpstatus=200 depending on includefailed flag + final String finalQuery = RecrawlBusyThread.buildSelectionQuery(recrawlQuery, inclerrdoc); + final long count = solrConnector.getCountByQuery(finalQuery); + prop.put("recrawljobrunning_simulationResult", 1); + prop.put("recrawljobrunning_simulationResult_docCount", count); + if(count > 0) { + /* Got some results : add a link to the related solr select URL for easily browsing results */ + final int maxRows = 10; + final String solrSelectUrl = genLocalSolrSelectUrl(finalQuery, maxRows); + prop.put("recrawljobrunning_simulationResult_showSelectLink", 1); + prop.put("recrawljobrunning_simulationResult_showSelectLink_rows", maxRows); + prop.put("recrawljobrunning_simulationResult_showSelectLink_browseSelectedUrl", solrSelectUrl); + } else { + prop.put("recrawljobrunning_simulationResult_showSelectLink", 0); + } + } catch (final IOException e) { + prop.put("recrawljobrunning_simulationResult", 2); + ConcurrentLog.logException(e); + } + } else { + prop.put("recrawljobrunning_simulationResult", 3); + } + } + } + + if(post.containsKey("recrawlDefaults")) { recrawlQuery = RecrawlBusyThread.DEFAULT_QUERY; inclerrdoc = RecrawlBusyThread.DEFAULT_INCLUDE_FAILED; } @@ -209,6 +229,23 @@ public class IndexReIndexMonitor_p { return prop; } + /** + * @param query + * the Solr selection query. Must not be null and not + * percent-encoded. + * @return the URL of the select query targetting the local Solr + */ + protected static String genLocalSolrSelectUrl(final String query, final int maxRows) { + String urlEncodedQUery; + try { + urlEncodedQUery = URLEncoder.encode(query, StandardCharsets.UTF_8.name()); + } catch (final UnsupportedEncodingException e) { + // should not happen as UTF-8 must be supported on any Java platform + urlEncodedQUery = query; + } + return "solr/select?core=collection1&wt=html&start=0&rows=" + maxRows + "&q=" + urlEncodedQUery; + } + /** * Write information on the eventual currently running or last recrawl job terminated * @param header current request header. Must not be null.