do not use luke requests for remote solr servers if the result is

different from normal requests. This happens if the remote solr is
actually a solrCloud; in such cases the luke request returns only the
result of the single solr peer, not the whole cloud.
also done: some refactoring.
pull/1/head
Michael Peter Christen 11 years ago
parent 18a56446ce
commit 0f6b72f24b

@ -111,7 +111,7 @@ public class CrawlStartScanner_p
// get a list of all hosts in the index
ReversibleScoreMap<String> hostscore = null;
try {
hostscore = sb.index.fulltext().getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_TERM, 1000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
hostscore = sb.index.fulltext().getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_QUERY, 1000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
} catch (final IOException e) {}
if (hostscore != null) {
for (String s: hostscore) hostSet.add(s);

@ -156,7 +156,7 @@ public class HostBrowser {
if (admin && post.containsKey("deleteLoadErrors")) {
try {
fulltext.getDefaultConnector().deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND "
+ CollectionSchema.httpstatus_i.getSolrFieldName() + ":[* TO *]"); // make sure field exists
+ CollectionSchema.httpstatus_i.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists
ConcurrentLog.info ("HostBrowser:", "delete documents with httpstatus_i <> 200");
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" );
ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = fail");
@ -178,7 +178,7 @@ public class HostBrowser {
int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
// collect hosts from index
ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_TERM, maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_QUERY, maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
if (hostscore == null) hostscore = new ClusteredScoreMap<String>();
// collect hosts from crawler
@ -269,7 +269,7 @@ public class HostBrowser {
}
} else {
if (facetcount > 1000 || post.containsKey("nepr")) {
q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(":[* TO *]");
q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM);
}
}
BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(q.toString(), 0, 100000, TIMEOUT, 100,

@ -199,7 +199,7 @@ public class IndexDeletion_p {
if (post != null && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) {
boolean simulate = post.containsKey("simulate-collectiondelete");
collectiondelete = collectiondelete.replaceAll(" ","").replaceAll(",", "|");
String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + ":[* TO *]" : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete);
String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + AbstractSolrConnector.CATCHALL_DTERM : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete);
if (simulate) {
try {
count = (int) defaultConnector.getCountByQuery(query);

@ -27,6 +27,7 @@
import java.io.IOException;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.Memory;
import net.yacy.crawler.CrawlSwitchboard;
@ -138,7 +139,7 @@ public class status_p {
long webgraphTimeSinceStart = processWebgraph && Switchboard.postprocessingRunning ? System.currentTimeMillis() - Switchboard.postprocessingStartTime[1] : 0;
long collectionRemainingCount = 0;
if (processCollection) try {collectionRemainingCount = sb.index.fulltext().getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
if (processCollection) try {collectionRemainingCount = sb.index.fulltext().getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
long collectionCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[0] - collectionRemainingCount : 0;
int collectionSpeed = collectionTimeSinceStart == 0 ? 0 : (int) (60000 * collectionCountSinceStart / collectionTimeSinceStart); // pages per minute
long collectionRemainingTime = collectionSpeed == 0 ? 0 : 60000 * collectionRemainingCount / collectionSpeed; // millis
@ -146,7 +147,7 @@ public class status_p {
int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000);
long webgraphRemainingCount = 0;
if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0;
int webgraphSpeed = webgraphTimeSinceStart == 0 ? 0 : (int) (60000 * webgraphCountSinceStart / webgraphTimeSinceStart); // pages per minute
long webgraphRemainingTime = webgraphSpeed == 0 ? 0 : 60000 * webgraphRemainingCount / webgraphSpeed; // millis

@ -105,6 +105,7 @@ public class ASCII implements Comparator<String> {
}
public final static String String(final byte[] bytes) {
if (bytes == null) return null;
StringBuilder sb = new StringBuilder(bytes.length);
for (byte b : bytes) {
if (b < 0) throw new IllegalArgumentException();

@ -63,18 +63,20 @@ public abstract class AbstractSolrConnector implements SolrConnector {
public final static SolrDocument POISON_DOCUMENT = new SolrDocument();
public final static String POISON_ID = "POISON_ID";
public final static String CATCHALL_TERM = "*:*";
public final static String CATCHALL_TERM = "[* TO *]";
public final static String CATCHALL_DTERM = ":" + CATCHALL_TERM;
public final static String CATCHALL_QUERY = "*:*";
public final static SolrQuery catchallQuery = new SolrQuery();
static {
catchallQuery.setQuery(CATCHALL_TERM);
catchallQuery.setQuery(CATCHALL_QUERY);
catchallQuery.setFields(CollectionSchema.id.getSolrFieldName());
catchallQuery.setRows(0);
catchallQuery.setStart(0);
}
public final static SolrQuery catchSuccessQuery = new SolrQuery();
static {
//catchSuccessQuery.setQuery("-" + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
catchSuccessQuery.setQuery(CATCHALL_TERM); // failreason_s is only available for core collection1
//catchSuccessQuery.setQuery("-" + CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
catchSuccessQuery.setQuery(CATCHALL_QUERY); // failreason_s is only available for core collection1
catchSuccessQuery.setFields(CollectionSchema.id.getSolrFieldName());
catchSuccessQuery.clearSorts();
catchSuccessQuery.setIncludeScore(false);
@ -200,7 +202,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
@Override
public Iterator<String> iterator() {
final BlockingQueue<String> queue = concurrentIDsByQuery(CATCHALL_TERM, 0, Integer.MAX_VALUE, 60000);
final BlockingQueue<String> queue = concurrentIDsByQuery(CATCHALL_QUERY, 0, Integer.MAX_VALUE, 60000);
return new LookAheadIterator<String>() {
@Override
protected String next0() {

@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.federate.solr.instance.ServerShard;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema;
@ -101,26 +102,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
}
}
/**
* get the number of segments.
* @return the number of segments, or 0 if unknown
*/
public int getSegmentCount() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
NamedList<Object> info = lukeResponse.getIndexInfo();
if (info == null) return 0;
Integer segmentCount = (Integer) info.get("segmentCount");
if (segmentCount == null) return 1;
return segmentCount.intValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
@Override
public boolean isClosed() {
return this.server == null; // we cannot now this exactly when server != null, because SolrServer does not provide a method to test the close status
@ -144,22 +125,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
}
}
@Override
public long getSize() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
if (lukeResponse == null) return 0;
Integer numDocs = lukeResponse.getNumDocs();
if (numDocs == null) return 0;
return numDocs.longValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
/**
* delete everything in the solr index
* @throws IOException
@ -169,7 +134,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server == null) return;
synchronized (this.server) {
try {
this.server.deleteByQuery(AbstractSolrConnector.CATCHALL_TERM);
this.server.deleteByQuery(AbstractSolrConnector.CATCHALL_QUERY);
this.server.commit(true, true, false);
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
@ -345,44 +310,90 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
}
}
// luke requests: these do not work for attached SolrCloud Server
public Collection<FieldInfo> getFields() throws SolrServerException {
// get all fields contained in index
return getIndexBrowser(false).getFieldInfo().values();
}
/**
* get the number of segments.
* @return the number of segments, or 0 if unknown
*/
public int getSegmentCount() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
NamedList<Object> info = lukeResponse.getIndexInfo();
if (info == null) return 0;
Integer segmentCount = (Integer) info.get("segmentCount");
if (segmentCount == null) return 1;
return segmentCount.intValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
private int useluke = 0; // 3-value logic: 1=yes, -1=no, 0=dontknow
@Override
public long getSize() {
if (this.server == null) return 0;
if (this.server instanceof ServerShard) {
// the server can be a single shard; we don't know here
// to test that, we submit requests to bots variants
if (useluke == 1) return getSizeLukeRequest();
if (useluke == -1) return getSizeQueryRequest();
long ls = getSizeLukeRequest();
long qs = getSizeQueryRequest();
if (ls == qs) {
useluke = 1;
return ls;
}
useluke = -1;
return qs;
}
return getSizeLukeRequest();
}
private long getSizeQueryRequest() {
if (this.server == null) return 0;
try {
final QueryResponse rsp = getResponseByParams(AbstractSolrConnector.catchSuccessQuery);
if (rsp == null) return 0;
final SolrDocumentList docs = rsp.getResults();
if (docs == null) return 0;
return docs.getNumFound();
} catch (final Throwable e) {
log.warn(e);
return 0;
}
}
private long getSizeLukeRequest() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
if (lukeResponse == null) return 0;
Integer numDocs = lukeResponse.getNumDocs();
if (numDocs == null) return 0;
return numDocs.longValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
private LukeResponse getIndexBrowser(final boolean showSchema) throws SolrServerException {
// get all fields contained in index
final LukeRequest lukeRequest = new LukeRequest();
lukeRequest.setResponseParser(new XMLResponseParser());
lukeRequest.setNumTerms(0);
lukeRequest.setShowSchema(showSchema);
/*
final SolrRequest lukeRequest = new SolrRequest(METHOD.GET, "/admin/luke") {
private static final long serialVersionUID = 1L;
@Override
public Collection<ContentStream> getContentStreams() throws IOException {
return null;
}
@Override
public SolrParams getParams() {
ModifiableSolrParams params = new ModifiableSolrParams();
//params.add("numTerms", "1");
params.add("_", "" + System.currentTimeMillis()); // cheat a proxy
if (showSchema) params.add("show", "schema");
return params;
}
@Override
public LukeResponse process(SolrServer server) throws SolrServerException, IOException {
long startTime = System.currentTimeMillis();
LukeResponse res = new LukeResponse();
this.setResponseParser(new XMLResponseParser());
NamedList<Object> response = server.request(this);
res.setResponse(response);
res.setElapsedTime(System.currentTimeMillis() - startTime);
return res;
}
};
*/
LukeResponse lukeResponse = null;
try {
lukeResponse = lukeRequest.process(this.server);

@ -81,14 +81,14 @@ public class ResponseAccumulator {
for (Map.Entry<String, Object> e: facet_counts) facet_countsAcc.add(e.getKey(), e.getValue());
}
// accumulate the index (thats the result from a luke request)
// accumulate the index
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> index_counts = (SimpleOrderedMap<Object>) response.get("index");
if (index_counts != null) {
for (Map.Entry<String, Object> e: index_counts) index_countsAcc.add(e.getKey(), e.getValue());
}
// accumulate the fields (thats the result from a luke request)
// accumulate the fields
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> schema = (SimpleOrderedMap<Object>) response.get("schema");
if (schema != null) {

@ -102,6 +102,7 @@ import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaConfiguration;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.instance.RemoteInstance;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
@ -2311,17 +2312,17 @@ public final class Switchboard extends serverSwitch {
// run postprocessing on these profiles
postprocessingRunning = true;
postprocessingStartTime[0] = System.currentTimeMillis();
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, profileHash);
postprocessingStartTime[0] = 0;
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} // should be zero but you never know
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know
if (processWebgraph) {
postprocessingStartTime[1] = System.currentTimeMillis();
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
for (String profileHash: deletionCandidates) proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, profileHash);
postprocessingStartTime[1] = 0;
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
}
this.crawler.cleanProfiles(deletionCandidates);
log.info("cleanup removed " + cleanupByHarvestkey + " crawl profiles, post-processed " + proccount + " documents");
@ -2329,17 +2330,17 @@ public final class Switchboard extends serverSwitch {
// run postprocessing on all profiles
postprocessingRunning = true;
postprocessingStartTime[0] = System.currentTimeMillis();
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, null);
postprocessingStartTime[0] = 0;
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} // should be zero but you never know
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know
if (processWebgraph) {
postprocessingStartTime[1] = System.currentTimeMillis();
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, null);
postprocessingStartTime[1] = 0;
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
}
this.crawler.cleanProfiles(this.crawler.getActiveProfiles());
log.info("cleanup post-processed " + proccount + " documents");

@ -38,6 +38,7 @@ import org.apache.solr.common.params.CommonParams;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.search.index.Fulltext;
@ -65,7 +66,7 @@ public class ErrorCache {
params.setFacet(false);
params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc));
params.setFields(CollectionSchema.id.getSolrFieldName());
params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given
SolrDocumentList docList = fulltext.getDefaultConnector().getDocumentListByParams(params);
if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) {
@ -79,7 +80,7 @@ public class ErrorCache {
public void clear() throws IOException {
if (this.cache != null) synchronized (this.cache) {this.cache.clear();}
this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
}
public void removeHosts(final Set<String> hosthashes) {
@ -166,7 +167,7 @@ public class ErrorCache {
}
if (failDoc != null) return failDoc;
try {
final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + ":[* TO *]", 0, 1);
final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM, 0, 1);
if (docs == null || docs.isEmpty()) return null;
SolrDocument doc = docs.get(0);
if (doc == null) return null;

@ -376,7 +376,7 @@ public final class Fulltext {
* @param hosthashes
*/
public void deleteDomainErrors(final Set<String> hosthashes) {
deleteDomainWithConstraint(this.getDefaultConnector(), CollectionSchema.host_id_s.getSolrFieldName(), hosthashes, CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
deleteDomainWithConstraint(this.getDefaultConnector(), CollectionSchema.host_id_s.getSolrFieldName(), hosthashes, CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
}
private static void deleteDomainWithConstraint(SolrConnector connector, String fieldname, final Set<String> hosthashes, String constraintQuery) {

@ -20,13 +20,18 @@ package net.yacy.search.index;
*/
import java.io.IOException;
import net.yacy.search.Switchboard;
import java.util.ArrayList;
import java.util.concurrent.Semaphore;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.workflow.AbstractBusyThread;
import net.yacy.search.schema.CollectionConfiguration;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
@ -92,7 +97,7 @@ import org.apache.solr.common.SolrInputDocument;
*/
public void addSelectFieldname(String field) {
if (field != null && !field.isEmpty()) {
querylist.add(field + ":[* TO *]");
querylist.add(field + AbstractSolrConnector.CATCHALL_DTERM);
}
}

@ -485,7 +485,7 @@ public class Segment {
final BlockingQueue<SolrDocument> docQueue;
final String urlstub;
if (stub == null) {
docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_TERM, 0, Integer.MAX_VALUE, maxtime, maxcount, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_QUERY, 0, Integer.MAX_VALUE, maxtime, maxcount, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
urlstub = null;
} else {
final String host = stub.getHost();

@ -36,6 +36,7 @@ import net.yacy.cora.document.WordCache;
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.storage.HandleSet;
import net.yacy.document.parser.html.AbstractScraper;
@ -354,7 +355,7 @@ public class QueryGoal {
// add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR ");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM + " OR ");
q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR ");
q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");

@ -924,7 +924,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// collect hosts from index which shall take part in citation computation
String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
ReversibleScoreMap<String> hostscore;
try {
Map<String, ReversibleScoreMap<String>> hostfacet = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName());
@ -950,7 +950,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// This shall fulfill the following requirement:
// If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C.
// To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + ":[* TO *]";
String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
long patchquerycount = collectionConnector.getCountByQuery(patchquery);
BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, 0, 10000000, 600000, 100,
CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
@ -1065,7 +1065,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// process all documents in collection
query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
Map<String, Long> hostExtentCache = new HashMap<String, Long>(); // a mapping from the host id to the number of documents which contain this host-id
Set<String> uniqueURLs = new HashSet<String>();
try {
@ -1311,7 +1311,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
ncr += d[0] / ilc;
} else {
// Output a warning that d[] is empty
ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + iid);
ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + ASCII.String(iid));
break;
}
}

@ -315,7 +315,7 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
// that means we must search for those entries.
webgraphConnector.commit(true); // make sure that we have latest information that can be found
//BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
String query = (harvestkey == null || !this.contains(WebgraphSchema.harvestkey_s) ? "" : WebgraphSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
String query = (harvestkey == null || !this.contains(WebgraphSchema.harvestkey_s) ? "" : WebgraphSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
BlockingQueue<SolrDocument> docs = webgraphConnector.concurrentDocumentsByQuery(query, 0, 10000000, 1800000, 100);
SolrDocument doc;

Loading…
Cancel
Save