Add a query link to local Solr to browse selected recrawl candidates

pull/154/head
luccioman 7 years ago
parent 59f7763af6
commit d95d393a0d

@ -70,13 +70,16 @@
#(recrawljobrunning)#
<fieldset>
#(error)#::
<div class="alert alert-danger" role="alert">Re-crawl works only with an embedded local Solr index!</div>
#(/error)#
<div class="form-group">
<label>Solr query <input type="text" name="recrawlquerytext" size="40" value="#[recrawlquerytext]#" /></label>
<input type="submit" name="simulateRecrawl" value="Simulate" class="btn btn-default" title="Check only how many documents would be selected for recrawl"/>
</div>
#(simulationResult)#
::
<div class="alert alert-success" role="alert">#[docCount]# documents selected for recrawl.</div>
<div class="alert alert-success" role="alert">#[docCount]# #(showSelectLink)#document(s)::<a href="#[browseSelectedUrl]#" target="recrawlCandidates" title="Browse metadata of the #[rows]# first selected documents">document(s)</a>#(/showSelectLink)# selected for recrawl.</div>
::
<div class="alert alert-danger" role="alert">An error occurred when trying to run the selection query.</div>
::

@ -18,6 +18,9 @@
* <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.DateTimeException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
@ -134,42 +137,59 @@ public class IndexReIndexMonitor_p {
if (recrawlbt == null || recrawlbt.shutdownInProgress()) {
prop.put("recrawljobrunning_simulationResult", 0);
if (post.containsKey("recrawlnow") && sb.index.fulltext().connectedLocalSolr()) {
sb.deployThread(RecrawlBusyThread.THREAD_NAME, "ReCrawl", "recrawl existing documents", null,
new RecrawlBusyThread(Switchboard.getSwitchboard(), recrawlQuery, inclerrdoc), 1000);
recrawlbt = sb.getThread(RecrawlBusyThread.THREAD_NAME);
/* store this call as an api call for easy scheduling possibility */
if(sb.tables != null) {
/* We avoid creating a duplicate of any already recorded API call with the same parameters */
final Row lastExecutedCall = WorkTables
.selectLastExecutedApiCall(IndexReIndexMonitor_p.SERVLET_NAME, post, sb);
if (lastExecutedCall != null && !post.containsKey(WorkTables.TABLE_API_COL_APICALL_PK)) {
byte[] lastExecutedCallPk = lastExecutedCall.getPK();
if (lastExecutedCallPk != null) {
post.add(WorkTables.TABLE_API_COL_APICALL_PK, UTF8.String(lastExecutedCallPk));
}
}
sb.tables.recordAPICall(post, IndexReIndexMonitor_p.SERVLET_NAME, WorkTables.TABLE_API_TYPE_CRAWLER,
"Recrawl documents matching selection query : " + recrawlQuery);
}
prop.put("recrawljobrunning_error", 0);
if(!sb.index.fulltext().connectedLocalSolr()) {
prop.put("recrawljobrunning_error", 1); // Re-crawl works only with an embedded local Solr index
} else {
if (post.containsKey("recrawlnow")) {
sb.deployThread(RecrawlBusyThread.THREAD_NAME, "ReCrawl", "recrawl existing documents", null,
new RecrawlBusyThread(Switchboard.getSwitchboard(), recrawlQuery, inclerrdoc), 1000);
recrawlbt = sb.getThread(RecrawlBusyThread.THREAD_NAME);
} else if(post.containsKey("simulateRecrawl") && sb.index.fulltext().connectedLocalSolr()) {
SolrConnector solrConnector = sb.index.fulltext().getDefaultConnector();
if (!solrConnector.isClosed()) {
try {
// query all or only httpstatus=200 depending on includefailed flag
final long count = solrConnector.getCountByQuery(RecrawlBusyThread.buildSelectionQuery(recrawlQuery, inclerrdoc));
prop.put("recrawljobrunning_simulationResult", 1);
prop.put("recrawljobrunning_simulationResult_docCount", count);
} catch (final IOException e) {
prop.put("recrawljobrunning_simulationResult", 2);
ConcurrentLog.logException(e);
}
} else {
prop.put("recrawljobrunning_simulationResult", 3);
}
} else if(post.containsKey("recrawlDefaults")) {
/* store this call as an api call for easy scheduling possibility */
if(sb.tables != null) {
/* We avoid creating a duplicate of any already recorded API call with the same parameters */
final Row lastExecutedCall = WorkTables
.selectLastExecutedApiCall(IndexReIndexMonitor_p.SERVLET_NAME, post, sb);
if (lastExecutedCall != null && !post.containsKey(WorkTables.TABLE_API_COL_APICALL_PK)) {
byte[] lastExecutedCallPk = lastExecutedCall.getPK();
if (lastExecutedCallPk != null) {
post.add(WorkTables.TABLE_API_COL_APICALL_PK, UTF8.String(lastExecutedCallPk));
}
}
sb.tables.recordAPICall(post, IndexReIndexMonitor_p.SERVLET_NAME, WorkTables.TABLE_API_TYPE_CRAWLER,
"Recrawl documents matching selection query : " + recrawlQuery);
}
} else if(post.containsKey("simulateRecrawl")) {
final SolrConnector solrConnector = sb.index.fulltext().getDefaultConnector();
if (!solrConnector.isClosed()) {
try {
// query all or only httpstatus=200 depending on includefailed flag
final String finalQuery = RecrawlBusyThread.buildSelectionQuery(recrawlQuery, inclerrdoc);
final long count = solrConnector.getCountByQuery(finalQuery);
prop.put("recrawljobrunning_simulationResult", 1);
prop.put("recrawljobrunning_simulationResult_docCount", count);
if(count > 0) {
/* Got some results : add a link to the related solr select URL for easily browsing results */
final int maxRows = 10;
final String solrSelectUrl = genLocalSolrSelectUrl(finalQuery, maxRows);
prop.put("recrawljobrunning_simulationResult_showSelectLink", 1);
prop.put("recrawljobrunning_simulationResult_showSelectLink_rows", maxRows);
prop.put("recrawljobrunning_simulationResult_showSelectLink_browseSelectedUrl", solrSelectUrl);
} else {
prop.put("recrawljobrunning_simulationResult_showSelectLink", 0);
}
} catch (final IOException e) {
prop.put("recrawljobrunning_simulationResult", 2);
ConcurrentLog.logException(e);
}
} else {
prop.put("recrawljobrunning_simulationResult", 3);
}
}
}
if(post.containsKey("recrawlDefaults")) {
recrawlQuery = RecrawlBusyThread.DEFAULT_QUERY;
inclerrdoc = RecrawlBusyThread.DEFAULT_INCLUDE_FAILED;
}
@ -209,6 +229,23 @@ public class IndexReIndexMonitor_p {
return prop;
}
/**
* @param query
* the Solr selection query. Must not be null and not
* percent-encoded.
* @return the URL of the select query targetting the local Solr
*/
protected static String genLocalSolrSelectUrl(final String query, final int maxRows) {
String urlEncodedQUery;
try {
urlEncodedQUery = URLEncoder.encode(query, StandardCharsets.UTF_8.name());
} catch (final UnsupportedEncodingException e) {
// should not happen as UTF-8 must be supported on any Java platform
urlEncodedQUery = query;
}
return "solr/select?core=collection1&wt=html&start=0&rows=" + maxRows + "&q=" + urlEncodedQUery;
}
/**
* Write information on the eventual currently running or last recrawl job terminated
* @param header current request header. Must not be null.

Loading…
Cancel
Save