diff --git a/htroot/CrawlStartScanner_p.java b/htroot/CrawlStartScanner_p.java index 420657343..09072404a 100644 --- a/htroot/CrawlStartScanner_p.java +++ b/htroot/CrawlStartScanner_p.java @@ -111,7 +111,7 @@ public class CrawlStartScanner_p // get a list of all hosts in the index ReversibleScoreMap hostscore = null; try { - hostscore = sb.index.fulltext().getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_TERM, 1000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); + hostscore = sb.index.fulltext().getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_QUERY, 1000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); } catch (final IOException e) {} if (hostscore != null) { for (String s: hostscore) hostSet.add(s); diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java index 43f1b4d4d..e904fd9c0 100644 --- a/htroot/HostBrowser.java +++ b/htroot/HostBrowser.java @@ -156,7 +156,7 @@ public class HostBrowser { if (admin && post.containsKey("deleteLoadErrors")) { try { fulltext.getDefaultConnector().deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND " - + CollectionSchema.httpstatus_i.getSolrFieldName() + ":[* TO *]"); // make sure field exists + + CollectionSchema.httpstatus_i.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists ConcurrentLog.info ("HostBrowser:", "delete documents with httpstatus_i <> 200"); fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" ); ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = fail"); @@ -178,7 +178,7 @@ public class HostBrowser { int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums // collect hosts from index - ReversibleScoreMap hostscore = fulltext.getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_TERM, maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); + ReversibleScoreMap hostscore = fulltext.getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_QUERY, maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); if (hostscore == null) hostscore = new ClusteredScoreMap(); // collect hosts from crawler @@ -269,7 +269,7 @@ public class HostBrowser { } } else { if (facetcount > 1000 || post.containsKey("nepr")) { - q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(":[* TO *]"); + q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM); } } BlockingQueue docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(q.toString(), 0, 100000, TIMEOUT, 100, diff --git a/htroot/IndexDeletion_p.java b/htroot/IndexDeletion_p.java index 3242a8643..89e7bba75 100644 --- a/htroot/IndexDeletion_p.java +++ b/htroot/IndexDeletion_p.java @@ -199,7 +199,7 @@ public class IndexDeletion_p { if (post != null && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) { boolean simulate = post.containsKey("simulate-collectiondelete"); collectiondelete = collectiondelete.replaceAll(" ","").replaceAll(",", "|"); - String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + ":[* TO *]" : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete); + String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + AbstractSolrConnector.CATCHALL_DTERM : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete); if (simulate) { try { count = (int) defaultConnector.getCountByQuery(query); diff --git a/htroot/api/status_p.java b/htroot/api/status_p.java index 189159ffa..17435c07c 100644 --- a/htroot/api/status_p.java +++ b/htroot/api/status_p.java @@ -27,6 +27,7 @@ import java.io.IOException; +import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.Memory; import net.yacy.crawler.CrawlSwitchboard; @@ -138,7 +139,7 @@ public class status_p { long webgraphTimeSinceStart = processWebgraph && Switchboard.postprocessingRunning ? System.currentTimeMillis() - Switchboard.postprocessingStartTime[1] : 0; long collectionRemainingCount = 0; - if (processCollection) try {collectionRemainingCount = sb.index.fulltext().getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + if (processCollection) try {collectionRemainingCount = sb.index.fulltext().getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} long collectionCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[0] - collectionRemainingCount : 0; int collectionSpeed = collectionTimeSinceStart == 0 ? 0 : (int) (60000 * collectionCountSinceStart / collectionTimeSinceStart); // pages per minute long collectionRemainingTime = collectionSpeed == 0 ? 0 : 60000 * collectionRemainingCount / collectionSpeed; // millis @@ -146,7 +147,7 @@ public class status_p { int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000); long webgraphRemainingCount = 0; - if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0; int webgraphSpeed = webgraphTimeSinceStart == 0 ? 0 : (int) (60000 * webgraphCountSinceStart / webgraphTimeSinceStart); // pages per minute long webgraphRemainingTime = webgraphSpeed == 0 ? 0 : 60000 * webgraphRemainingCount / webgraphSpeed; // millis diff --git a/source/net/yacy/cora/document/encoding/ASCII.java b/source/net/yacy/cora/document/encoding/ASCII.java index fe4d88b07..17ded713a 100644 --- a/source/net/yacy/cora/document/encoding/ASCII.java +++ b/source/net/yacy/cora/document/encoding/ASCII.java @@ -105,6 +105,7 @@ public class ASCII implements Comparator { } public final static String String(final byte[] bytes) { + if (bytes == null) return null; StringBuilder sb = new StringBuilder(bytes.length); for (byte b : bytes) { if (b < 0) throw new IllegalArgumentException(); diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index 521433886..fee62dcc0 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -63,18 +63,20 @@ public abstract class AbstractSolrConnector implements SolrConnector { public final static SolrDocument POISON_DOCUMENT = new SolrDocument(); public final static String POISON_ID = "POISON_ID"; - public final static String CATCHALL_TERM = "*:*"; + public final static String CATCHALL_TERM = "[* TO *]"; + public final static String CATCHALL_DTERM = ":" + CATCHALL_TERM; + public final static String CATCHALL_QUERY = "*:*"; public final static SolrQuery catchallQuery = new SolrQuery(); static { - catchallQuery.setQuery(CATCHALL_TERM); + catchallQuery.setQuery(CATCHALL_QUERY); catchallQuery.setFields(CollectionSchema.id.getSolrFieldName()); catchallQuery.setRows(0); catchallQuery.setStart(0); } public final static SolrQuery catchSuccessQuery = new SolrQuery(); static { - //catchSuccessQuery.setQuery("-" + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); - catchSuccessQuery.setQuery(CATCHALL_TERM); // failreason_s is only available for core collection1 + //catchSuccessQuery.setQuery("-" + CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); + catchSuccessQuery.setQuery(CATCHALL_QUERY); // failreason_s is only available for core collection1 catchSuccessQuery.setFields(CollectionSchema.id.getSolrFieldName()); catchSuccessQuery.clearSorts(); catchSuccessQuery.setIncludeScore(false); @@ -200,7 +202,7 @@ public abstract class AbstractSolrConnector implements SolrConnector { @Override public Iterator iterator() { - final BlockingQueue queue = concurrentIDsByQuery(CATCHALL_TERM, 0, Integer.MAX_VALUE, 60000); + final BlockingQueue queue = concurrentIDsByQuery(CATCHALL_QUERY, 0, Integer.MAX_VALUE, 60000); return new LookAheadIterator() { @Override protected String next0() { diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java index 4a5ed0ffe..7dba6ae01 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import net.yacy.cora.federate.solr.instance.ServerShard; import net.yacy.cora.util.ConcurrentLog; import net.yacy.search.schema.CollectionSchema; @@ -100,26 +101,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen } } } - - /** - * get the number of segments. - * @return the number of segments, or 0 if unknown - */ - public int getSegmentCount() { - if (this.server == null) return 0; - try { - LukeResponse lukeResponse = getIndexBrowser(false); - NamedList info = lukeResponse.getIndexInfo(); - if (info == null) return 0; - Integer segmentCount = (Integer) info.get("segmentCount"); - if (segmentCount == null) return 1; - return segmentCount.intValue(); - } catch (final Throwable e) { - clearCaches(); // prevent further OOM if this was caused by OOM - log.warn(e); - return 0; - } - } @Override public boolean isClosed() { @@ -144,22 +125,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen } } - @Override - public long getSize() { - if (this.server == null) return 0; - try { - LukeResponse lukeResponse = getIndexBrowser(false); - if (lukeResponse == null) return 0; - Integer numDocs = lukeResponse.getNumDocs(); - if (numDocs == null) return 0; - return numDocs.longValue(); - } catch (final Throwable e) { - clearCaches(); // prevent further OOM if this was caused by OOM - log.warn(e); - return 0; - } - } - /** * delete everything in the solr index * @throws IOException @@ -169,7 +134,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen if (this.server == null) return; synchronized (this.server) { try { - this.server.deleteByQuery(AbstractSolrConnector.CATCHALL_TERM); + this.server.deleteByQuery(AbstractSolrConnector.CATCHALL_QUERY); this.server.commit(true, true, false); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM @@ -345,10 +310,83 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen } } + // luke requests: these do not work for attached SolrCloud Server + public Collection getFields() throws SolrServerException { // get all fields contained in index return getIndexBrowser(false).getFieldInfo().values(); } + + /** + * get the number of segments. + * @return the number of segments, or 0 if unknown + */ + public int getSegmentCount() { + if (this.server == null) return 0; + try { + LukeResponse lukeResponse = getIndexBrowser(false); + NamedList info = lukeResponse.getIndexInfo(); + if (info == null) return 0; + Integer segmentCount = (Integer) info.get("segmentCount"); + if (segmentCount == null) return 1; + return segmentCount.intValue(); + } catch (final Throwable e) { + clearCaches(); // prevent further OOM if this was caused by OOM + log.warn(e); + return 0; + } + } + + private int useluke = 0; // 3-value logic: 1=yes, -1=no, 0=dontknow + + @Override + public long getSize() { + if (this.server == null) return 0; + if (this.server instanceof ServerShard) { + // the server can be a single shard; we don't know here + // to test that, we submit requests to bots variants + if (useluke == 1) return getSizeLukeRequest(); + if (useluke == -1) return getSizeQueryRequest(); + long ls = getSizeLukeRequest(); + long qs = getSizeQueryRequest(); + if (ls == qs) { + useluke = 1; + return ls; + } + useluke = -1; + return qs; + } + return getSizeLukeRequest(); + } + + private long getSizeQueryRequest() { + if (this.server == null) return 0; + try { + final QueryResponse rsp = getResponseByParams(AbstractSolrConnector.catchSuccessQuery); + if (rsp == null) return 0; + final SolrDocumentList docs = rsp.getResults(); + if (docs == null) return 0; + return docs.getNumFound(); + } catch (final Throwable e) { + log.warn(e); + return 0; + } + } + + private long getSizeLukeRequest() { + if (this.server == null) return 0; + try { + LukeResponse lukeResponse = getIndexBrowser(false); + if (lukeResponse == null) return 0; + Integer numDocs = lukeResponse.getNumDocs(); + if (numDocs == null) return 0; + return numDocs.longValue(); + } catch (final Throwable e) { + clearCaches(); // prevent further OOM if this was caused by OOM + log.warn(e); + return 0; + } + } private LukeResponse getIndexBrowser(final boolean showSchema) throws SolrServerException { // get all fields contained in index @@ -356,33 +394,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen lukeRequest.setResponseParser(new XMLResponseParser()); lukeRequest.setNumTerms(0); lukeRequest.setShowSchema(showSchema); - /* - final SolrRequest lukeRequest = new SolrRequest(METHOD.GET, "/admin/luke") { - private static final long serialVersionUID = 1L; - @Override - public Collection getContentStreams() throws IOException { - return null; - } - @Override - public SolrParams getParams() { - ModifiableSolrParams params = new ModifiableSolrParams(); - //params.add("numTerms", "1"); - params.add("_", "" + System.currentTimeMillis()); // cheat a proxy - if (showSchema) params.add("show", "schema"); - return params; - } - @Override - public LukeResponse process(SolrServer server) throws SolrServerException, IOException { - long startTime = System.currentTimeMillis(); - LukeResponse res = new LukeResponse(); - this.setResponseParser(new XMLResponseParser()); - NamedList response = server.request(this); - res.setResponse(response); - res.setElapsedTime(System.currentTimeMillis() - startTime); - return res; - } - }; - */ LukeResponse lukeResponse = null; try { lukeResponse = lukeRequest.process(this.server); diff --git a/source/net/yacy/cora/federate/solr/instance/ResponseAccumulator.java b/source/net/yacy/cora/federate/solr/instance/ResponseAccumulator.java index f0317d6c5..a27b08cc0 100644 --- a/source/net/yacy/cora/federate/solr/instance/ResponseAccumulator.java +++ b/source/net/yacy/cora/federate/solr/instance/ResponseAccumulator.java @@ -81,14 +81,14 @@ public class ResponseAccumulator { for (Map.Entry e: facet_counts) facet_countsAcc.add(e.getKey(), e.getValue()); } - // accumulate the index (thats the result from a luke request) + // accumulate the index @SuppressWarnings("unchecked") SimpleOrderedMap index_counts = (SimpleOrderedMap) response.get("index"); if (index_counts != null) { for (Map.Entry e: index_counts) index_countsAcc.add(e.getKey(), e.getValue()); } - // accumulate the fields (thats the result from a luke request) + // accumulate the fields @SuppressWarnings("unchecked") SimpleOrderedMap schema = (SimpleOrderedMap) response.get("schema"); if (schema != null) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index c63e5f3b1..912f317e7 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -102,6 +102,7 @@ import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.federate.solr.FailCategory; import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.SchemaConfiguration; +import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.federate.solr.instance.RemoteInstance; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.order.Base64Order; @@ -2311,17 +2312,17 @@ public final class Switchboard extends serverSwitch { // run postprocessing on these profiles postprocessingRunning = true; postprocessingStartTime[0] = System.currentTimeMillis(); - try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, profileHash); postprocessingStartTime[0] = 0; - try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} // should be zero but you never know + try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know if (processWebgraph) { postprocessingStartTime[1] = System.currentTimeMillis(); - try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} for (String profileHash: deletionCandidates) proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, profileHash); postprocessingStartTime[1] = 0; - try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} } this.crawler.cleanProfiles(deletionCandidates); log.info("cleanup removed " + cleanupByHarvestkey + " crawl profiles, post-processed " + proccount + " documents"); @@ -2329,17 +2330,17 @@ public final class Switchboard extends serverSwitch { // run postprocessing on all profiles postprocessingRunning = true; postprocessingStartTime[0] = System.currentTimeMillis(); - try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, null); postprocessingStartTime[0] = 0; - try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} // should be zero but you never know + try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know if (processWebgraph) { postprocessingStartTime[1] = System.currentTimeMillis(); - try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, null); postprocessingStartTime[1] = 0; - try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} + try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} } this.crawler.cleanProfiles(this.crawler.getActiveProfiles()); log.info("cleanup post-processed " + proccount + " documents"); diff --git a/source/net/yacy/search/index/ErrorCache.java b/source/net/yacy/search/index/ErrorCache.java index 9a69c37e8..9fec1047e 100644 --- a/source/net/yacy/search/index/ErrorCache.java +++ b/source/net/yacy/search/index/ErrorCache.java @@ -38,6 +38,7 @@ import org.apache.solr.common.params.CommonParams; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.solr.FailCategory; +import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.data.CrawlProfile; import net.yacy.search.index.Fulltext; @@ -65,7 +66,7 @@ public class ErrorCache { params.setFacet(false); params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc)); params.setFields(CollectionSchema.id.getSolrFieldName()); - params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); + params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given SolrDocumentList docList = fulltext.getDefaultConnector().getDocumentListByParams(params); if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) { @@ -79,7 +80,7 @@ public class ErrorCache { public void clear() throws IOException { if (this.cache != null) synchronized (this.cache) {this.cache.clear();} - this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); + this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); } public void removeHosts(final Set hosthashes) { @@ -166,7 +167,7 @@ public class ErrorCache { } if (failDoc != null) return failDoc; try { - final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + ":[* TO *]", 0, 1); + final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM, 0, 1); if (docs == null || docs.isEmpty()) return null; SolrDocument doc = docs.get(0); if (doc == null) return null; diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 63954f37c..9b1463735 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -376,7 +376,7 @@ public final class Fulltext { * @param hosthashes */ public void deleteDomainErrors(final Set hosthashes) { - deleteDomainWithConstraint(this.getDefaultConnector(), CollectionSchema.host_id_s.getSolrFieldName(), hosthashes, CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); + deleteDomainWithConstraint(this.getDefaultConnector(), CollectionSchema.host_id_s.getSolrFieldName(), hosthashes, CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); } private static void deleteDomainWithConstraint(SolrConnector connector, String fieldname, final Set hosthashes, String constraintQuery) { diff --git a/source/net/yacy/search/index/ReindexSolrBusyThread.java b/source/net/yacy/search/index/ReindexSolrBusyThread.java index 91e5e61c7..c176ceea0 100644 --- a/source/net/yacy/search/index/ReindexSolrBusyThread.java +++ b/source/net/yacy/search/index/ReindexSolrBusyThread.java @@ -20,13 +20,18 @@ package net.yacy.search.index; */ import java.io.IOException; + import net.yacy.search.Switchboard; + import java.util.ArrayList; import java.util.concurrent.Semaphore; + +import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.util.ConcurrentLog; import net.yacy.kelondro.workflow.AbstractBusyThread; import net.yacy.search.schema.CollectionConfiguration; + import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; @@ -92,7 +97,7 @@ import org.apache.solr.common.SolrInputDocument; */ public void addSelectFieldname(String field) { if (field != null && !field.isEmpty()) { - querylist.add(field + ":[* TO *]"); + querylist.add(field + AbstractSolrConnector.CATCHALL_DTERM); } } diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 976a59ca1..2a282e219 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -485,7 +485,7 @@ public class Segment { final BlockingQueue docQueue; final String urlstub; if (stub == null) { - docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_TERM, 0, Integer.MAX_VALUE, maxtime, maxcount, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName()); + docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_QUERY, 0, Integer.MAX_VALUE, maxtime, maxcount, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName()); urlstub = null; } else { final String host = stub.getHost(); diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index 0b0e5acaa..d61171abf 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -36,6 +36,7 @@ import net.yacy.cora.document.WordCache; import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.SchemaDeclaration; import net.yacy.cora.federate.solr.SolrType; +import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.order.NaturalOrder; import net.yacy.cora.storage.HandleSet; import net.yacy.document.parser.html.AbstractScraper; @@ -354,7 +355,7 @@ public class QueryGoal { // add filter to prevent that results come from failed urls q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND ("); - q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR "); + q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM + " OR "); q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR "); q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))"); diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index fb0f28ac0..85451fbe9 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -924,7 +924,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // collect hosts from index which shall take part in citation computation String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + - CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; + CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM; ReversibleScoreMap hostscore; try { Map> hostfacet = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName()); @@ -950,7 +950,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // This shall fulfill the following requirement: // If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C. // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links - String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + ":[* TO *]"; + String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM; long patchquerycount = collectionConnector.getCountByQuery(patchquery); BlockingQueue documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, 0, 10000000, 600000, 100, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName()); @@ -1065,7 +1065,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // process all documents in collection query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + - CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; + CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM; Map hostExtentCache = new HashMap(); // a mapping from the host id to the number of documents which contain this host-id Set uniqueURLs = new HashSet(); try { @@ -1311,7 +1311,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri ncr += d[0] / ilc; } else { // Output a warning that d[] is empty - ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + iid); + ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + ASCII.String(iid)); break; } } diff --git a/source/net/yacy/search/schema/WebgraphConfiguration.java b/source/net/yacy/search/schema/WebgraphConfiguration.java index 1694294f1..9ab160cba 100644 --- a/source/net/yacy/search/schema/WebgraphConfiguration.java +++ b/source/net/yacy/search/schema/WebgraphConfiguration.java @@ -315,7 +315,7 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial // that means we must search for those entries. webgraphConnector.commit(true); // make sure that we have latest information that can be found //BlockingQueue docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10); - String query = (harvestkey == null || !this.contains(WebgraphSchema.harvestkey_s) ? "" : WebgraphSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; + String query = (harvestkey == null || !this.contains(WebgraphSchema.harvestkey_s) ? "" : WebgraphSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM; BlockingQueue docs = webgraphConnector.concurrentDocumentsByQuery(query, 0, 10000000, 1800000, 100); SolrDocument doc;