diff --git a/htroot/IndexReIndexMonitor_p.html b/htroot/IndexReIndexMonitor_p.html
index ba8a2f939..cb5a179f4 100644
--- a/htroot/IndexReIndexMonitor_p.html
+++ b/htroot/IndexReIndexMonitor_p.html
@@ -45,7 +45,14 @@
#(reindexjobrunning)#::
#(/reindexjobrunning)#
diff --git a/htroot/IndexReIndexMonitor_p.java b/htroot/IndexReIndexMonitor_p.java
index 00c56f35a..1c4da9fec 100644
--- a/htroot/IndexReIndexMonitor_p.java
+++ b/htroot/IndexReIndexMonitor_p.java
@@ -18,6 +18,7 @@
* .
*/
import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.sorting.OrderedScoreMap;
import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.migration;
@@ -54,15 +55,17 @@ public class IndexReIndexMonitor_p {
prop.put("docsprocessed", ((ReindexSolrBusyThread) bt).getProcessed());
prop.put("currentselectquery","q="+((ReindexSolrBusyThread) bt).getCurrentQuery());
// prepare list of fields in queue
- final String[] querylist = ((ReindexSolrBusyThread) bt).getQueryList();
+ final OrderedScoreMap querylist = ((ReindexSolrBusyThread) bt).getQueryList();
if (querylist != null) {
- String allfieldnames = "";
+ int i = 0;
for (String oneqs : querylist) { // just use fieldname from query (fieldname:[* TO *])
- allfieldnames = allfieldnames + oneqs.substring(0, oneqs.indexOf(':')) + "
";
+ prop.put("reindexjobrunning_fieldlist_"+i+"_fieldname", oneqs.substring(0, oneqs.indexOf(':')));
+ prop.put("reindexjobrunning_fieldlist_"+i+"_fieldscore", querylist.get(oneqs));
+ i++;
}
- prop.put("reindexjobrunning_fieldlist", allfieldnames);
+ prop.put("reindexjobrunning_fieldlist", querylist.size());
} else {
- prop.put("reindexjobrunning_fieldlist", "");
+ prop.put("reindexjobrunning_fieldlist", 0);
}
}
diff --git a/source/net/yacy/search/index/ReindexSolrBusyThread.java b/source/net/yacy/search/index/ReindexSolrBusyThread.java
index 94ff69069..b6aaa05d5 100644
--- a/source/net/yacy/search/index/ReindexSolrBusyThread.java
+++ b/source/net/yacy/search/index/ReindexSolrBusyThread.java
@@ -21,15 +21,14 @@ package net.yacy.search.index;
import java.io.IOException;
-import net.yacy.search.Switchboard;
-
-import java.util.ArrayList;
import java.util.concurrent.Semaphore;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
+import net.yacy.cora.sorting.OrderedScoreMap;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.workflow.AbstractBusyThread;
+import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionConfiguration;
import org.apache.solr.common.SolrDocument;
@@ -47,7 +46,10 @@ import org.apache.solr.common.SolrInputDocument;
* If queue is empty this removes itself from list of servers workerthreads list
* Process: - initialize with one or more select queries
* - deploy as BusyThread (or call job repeatedly until it returns false)
- * - job reindexes on each call chunk of 100 documents
+ * - job reindexes on each call chunk of 100 documents
+ *
+ * The thread uses internally a score map for the reindex queries this promotes fields with a low
+ * number of documents to get reindexed first.
*/
public class ReindexSolrBusyThread extends AbstractBusyThread {
@@ -56,7 +58,8 @@ import org.apache.solr.common.SolrInputDocument;
int processed = 0; // total number of reindexed documents
int docstoreindex = 0; // documents found to reindex for current query
Semaphore sem = new Semaphore(1);
- ArrayList querylist = new ArrayList(); // list of select statements to reindex
+ OrderedScoreMap querylist = new OrderedScoreMap(null); // list of select statements to reindex with number of documents as score
+ String currentquery = null;
int start = 0; // startindex
int chunksize = 100; // number of documents to reindex per cycle
@@ -72,7 +75,7 @@ import org.apache.solr.common.SolrInputDocument;
this.interrupt(); // only one active reindex job should exist
} else {
if (query != null) {
- this.querylist.add(query);
+ this.querylist.set(query, 0);
}
}
setName("reindexSolr");
@@ -85,7 +88,7 @@ import org.apache.solr.common.SolrInputDocument;
*/
public void addSelectQuery(String query) {
if (query != null && !query.isEmpty()) {
- querylist.add(query);
+ querylist.set(query, 0);
}
}
@@ -97,7 +100,7 @@ import org.apache.solr.common.SolrInputDocument;
*/
public void addSelectFieldname(String field) {
if (field != null && !field.isEmpty()) {
- querylist.add(field + AbstractSolrConnector.CATCHALL_DTERM);
+ querylist.set(field + AbstractSolrConnector.CATCHALL_DTERM, 0);
}
}
@@ -108,29 +111,30 @@ import org.apache.solr.common.SolrInputDocument;
@Override
public boolean job() {
boolean ret = true;
- if (esc != null && colcfg != null && querylist.size() > 0) {
+ if (esc != null && colcfg != null && !querylist.isEmpty()) {
- if (sem.tryAcquire()) {
+ if (sem.tryAcquire()) { // allow only one working cycle
try {
- String query = querylist.get(0);
- SolrDocumentList xdocs = esc.getDocumentListByQuery(query, null, start, chunksize);
- docstoreindex = (int) xdocs.getNumFound();
+ currentquery = querylist.keys(true).next(); // get next query with lowest number of documents found
+ SolrDocumentList xdocs = esc.getDocumentListByQuery(currentquery, null, start, chunksize);
if (xdocs.size() == 0) { // no documents returned = all of current query reindexed (or eventual start to large)
- querylist.remove(0); // consider normal case and remove current query
-
+
if (start > 0) { // if previous cycle reindexed, commit to prevent reindex of same documents
esc.commit(true);
start = 0;
+ } else { // if start == 0 and nothing found, query can be deleted for sure
+ querylist.delete(currentquery); // remove current query
}
if (chunksize < 100) { // try to increase chunksize (if reduced by freemem)
chunksize = chunksize + 10;
}
} else {
- ConcurrentLog.info("MIGRATION-REINDEX", "reindex docs with query=" + query + " found=" + docstoreindex + " start=" + start);
+ docstoreindex = (int) xdocs.getNumFound();
+ ConcurrentLog.info("MIGRATION-REINDEX", "reindex docs with query=" + currentquery + " found=" + docstoreindex + " start=" + start);
start = start + chunksize;
-
+ querylist.set(currentquery, docstoreindex);
for (SolrDocument doc : xdocs) {
SolrInputDocument idoc = colcfg.toSolrInputDocument(doc);
Switchboard.getSwitchboard().index.putDocument(idoc);
@@ -138,7 +142,8 @@ import org.apache.solr.common.SolrInputDocument;
}
}
} catch (final IOException ex) {
- ConcurrentLog.warn("MIGRATION-REINDEX", "remove following query from list due to error, q=" + querylist.remove(0));
+ ConcurrentLog.warn("MIGRATION-REINDEX", "remove following query from list due to error, q=" + currentquery);
+ querylist.delete(currentquery);
ConcurrentLog.logException(ex);
} finally {
sem.release();
@@ -177,19 +182,14 @@ import org.apache.solr.common.SolrInputDocument;
* @return the currently processed Solr select query
*/
public String getCurrentQuery() {
- return querylist.isEmpty() ? "" : querylist.get(0);
+ return querylist.isEmpty() ? "" : currentquery;
}
/**
* @return copy of all Solr select queries in the queue or null if empty
*/
- public String[] getQueryList() {
- if (querylist != null) {
- String[] list = new String[querylist.size()];
- list = querylist.toArray(list);
- return list;
- }
- return null;
+ public OrderedScoreMap getQueryList() {
+ return querylist;
}
/**