do not use luke requests for remote solr servers if the result is

different from normal requests. This happens if the remote solr is
actually a solrCloud; in such cases the luke request returns only the
result of the single solr peer, not the whole cloud.
also done: some refactoring.
pull/1/head
Michael Peter Christen 11 years ago
parent 18a56446ce
commit 0f6b72f24b

@ -111,7 +111,7 @@ public class CrawlStartScanner_p
// get a list of all hosts in the index // get a list of all hosts in the index
ReversibleScoreMap<String> hostscore = null; ReversibleScoreMap<String> hostscore = null;
try { try {
hostscore = sb.index.fulltext().getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_TERM, 1000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); hostscore = sb.index.fulltext().getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_QUERY, 1000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
} catch (final IOException e) {} } catch (final IOException e) {}
if (hostscore != null) { if (hostscore != null) {
for (String s: hostscore) hostSet.add(s); for (String s: hostscore) hostSet.add(s);

@ -156,7 +156,7 @@ public class HostBrowser {
if (admin && post.containsKey("deleteLoadErrors")) { if (admin && post.containsKey("deleteLoadErrors")) {
try { try {
fulltext.getDefaultConnector().deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND " fulltext.getDefaultConnector().deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND "
+ CollectionSchema.httpstatus_i.getSolrFieldName() + ":[* TO *]"); // make sure field exists + CollectionSchema.httpstatus_i.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists
ConcurrentLog.info ("HostBrowser:", "delete documents with httpstatus_i <> 200"); ConcurrentLog.info ("HostBrowser:", "delete documents with httpstatus_i <> 200");
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" ); fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" );
ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = fail"); ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = fail");
@ -178,7 +178,7 @@ public class HostBrowser {
int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
// collect hosts from index // collect hosts from index
ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_TERM, maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector().getFacets(AbstractSolrConnector.CATCHALL_QUERY, maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
if (hostscore == null) hostscore = new ClusteredScoreMap<String>(); if (hostscore == null) hostscore = new ClusteredScoreMap<String>();
// collect hosts from crawler // collect hosts from crawler
@ -269,7 +269,7 @@ public class HostBrowser {
} }
} else { } else {
if (facetcount > 1000 || post.containsKey("nepr")) { if (facetcount > 1000 || post.containsKey("nepr")) {
q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(":[* TO *]"); q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM);
} }
} }
BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(q.toString(), 0, 100000, TIMEOUT, 100, BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(q.toString(), 0, 100000, TIMEOUT, 100,

@ -199,7 +199,7 @@ public class IndexDeletion_p {
if (post != null && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) { if (post != null && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) {
boolean simulate = post.containsKey("simulate-collectiondelete"); boolean simulate = post.containsKey("simulate-collectiondelete");
collectiondelete = collectiondelete.replaceAll(" ","").replaceAll(",", "|"); collectiondelete = collectiondelete.replaceAll(" ","").replaceAll(",", "|");
String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + ":[* TO *]" : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete); String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + AbstractSolrConnector.CATCHALL_DTERM : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete);
if (simulate) { if (simulate) {
try { try {
count = (int) defaultConnector.getCountByQuery(query); count = (int) defaultConnector.getCountByQuery(query);

@ -27,6 +27,7 @@
import java.io.IOException; import java.io.IOException;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.Memory; import net.yacy.cora.util.Memory;
import net.yacy.crawler.CrawlSwitchboard; import net.yacy.crawler.CrawlSwitchboard;
@ -138,7 +139,7 @@ public class status_p {
long webgraphTimeSinceStart = processWebgraph && Switchboard.postprocessingRunning ? System.currentTimeMillis() - Switchboard.postprocessingStartTime[1] : 0; long webgraphTimeSinceStart = processWebgraph && Switchboard.postprocessingRunning ? System.currentTimeMillis() - Switchboard.postprocessingStartTime[1] : 0;
long collectionRemainingCount = 0; long collectionRemainingCount = 0;
if (processCollection) try {collectionRemainingCount = sb.index.fulltext().getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} if (processCollection) try {collectionRemainingCount = sb.index.fulltext().getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
long collectionCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[0] - collectionRemainingCount : 0; long collectionCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[0] - collectionRemainingCount : 0;
int collectionSpeed = collectionTimeSinceStart == 0 ? 0 : (int) (60000 * collectionCountSinceStart / collectionTimeSinceStart); // pages per minute int collectionSpeed = collectionTimeSinceStart == 0 ? 0 : (int) (60000 * collectionCountSinceStart / collectionTimeSinceStart); // pages per minute
long collectionRemainingTime = collectionSpeed == 0 ? 0 : 60000 * collectionRemainingCount / collectionSpeed; // millis long collectionRemainingTime = collectionSpeed == 0 ? 0 : 60000 * collectionRemainingCount / collectionSpeed; // millis
@ -146,7 +147,7 @@ public class status_p {
int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000); int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000);
long webgraphRemainingCount = 0; long webgraphRemainingCount = 0;
if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0; long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0;
int webgraphSpeed = webgraphTimeSinceStart == 0 ? 0 : (int) (60000 * webgraphCountSinceStart / webgraphTimeSinceStart); // pages per minute int webgraphSpeed = webgraphTimeSinceStart == 0 ? 0 : (int) (60000 * webgraphCountSinceStart / webgraphTimeSinceStart); // pages per minute
long webgraphRemainingTime = webgraphSpeed == 0 ? 0 : 60000 * webgraphRemainingCount / webgraphSpeed; // millis long webgraphRemainingTime = webgraphSpeed == 0 ? 0 : 60000 * webgraphRemainingCount / webgraphSpeed; // millis

@ -105,6 +105,7 @@ public class ASCII implements Comparator<String> {
} }
public final static String String(final byte[] bytes) { public final static String String(final byte[] bytes) {
if (bytes == null) return null;
StringBuilder sb = new StringBuilder(bytes.length); StringBuilder sb = new StringBuilder(bytes.length);
for (byte b : bytes) { for (byte b : bytes) {
if (b < 0) throw new IllegalArgumentException(); if (b < 0) throw new IllegalArgumentException();

@ -63,18 +63,20 @@ public abstract class AbstractSolrConnector implements SolrConnector {
public final static SolrDocument POISON_DOCUMENT = new SolrDocument(); public final static SolrDocument POISON_DOCUMENT = new SolrDocument();
public final static String POISON_ID = "POISON_ID"; public final static String POISON_ID = "POISON_ID";
public final static String CATCHALL_TERM = "*:*"; public final static String CATCHALL_TERM = "[* TO *]";
public final static String CATCHALL_DTERM = ":" + CATCHALL_TERM;
public final static String CATCHALL_QUERY = "*:*";
public final static SolrQuery catchallQuery = new SolrQuery(); public final static SolrQuery catchallQuery = new SolrQuery();
static { static {
catchallQuery.setQuery(CATCHALL_TERM); catchallQuery.setQuery(CATCHALL_QUERY);
catchallQuery.setFields(CollectionSchema.id.getSolrFieldName()); catchallQuery.setFields(CollectionSchema.id.getSolrFieldName());
catchallQuery.setRows(0); catchallQuery.setRows(0);
catchallQuery.setStart(0); catchallQuery.setStart(0);
} }
public final static SolrQuery catchSuccessQuery = new SolrQuery(); public final static SolrQuery catchSuccessQuery = new SolrQuery();
static { static {
//catchSuccessQuery.setQuery("-" + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); //catchSuccessQuery.setQuery("-" + CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
catchSuccessQuery.setQuery(CATCHALL_TERM); // failreason_s is only available for core collection1 catchSuccessQuery.setQuery(CATCHALL_QUERY); // failreason_s is only available for core collection1
catchSuccessQuery.setFields(CollectionSchema.id.getSolrFieldName()); catchSuccessQuery.setFields(CollectionSchema.id.getSolrFieldName());
catchSuccessQuery.clearSorts(); catchSuccessQuery.clearSorts();
catchSuccessQuery.setIncludeScore(false); catchSuccessQuery.setIncludeScore(false);
@ -200,7 +202,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
@Override @Override
public Iterator<String> iterator() { public Iterator<String> iterator() {
final BlockingQueue<String> queue = concurrentIDsByQuery(CATCHALL_TERM, 0, Integer.MAX_VALUE, 60000); final BlockingQueue<String> queue = concurrentIDsByQuery(CATCHALL_QUERY, 0, Integer.MAX_VALUE, 60000);
return new LookAheadIterator<String>() { return new LookAheadIterator<String>() {
@Override @Override
protected String next0() { protected String next0() {

@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import net.yacy.cora.federate.solr.instance.ServerShard;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
@ -100,26 +101,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
} }
} }
} }
/**
* get the number of segments.
* @return the number of segments, or 0 if unknown
*/
public int getSegmentCount() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
NamedList<Object> info = lukeResponse.getIndexInfo();
if (info == null) return 0;
Integer segmentCount = (Integer) info.get("segmentCount");
if (segmentCount == null) return 1;
return segmentCount.intValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
@Override @Override
public boolean isClosed() { public boolean isClosed() {
@ -144,22 +125,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
} }
} }
@Override
public long getSize() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
if (lukeResponse == null) return 0;
Integer numDocs = lukeResponse.getNumDocs();
if (numDocs == null) return 0;
return numDocs.longValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
/** /**
* delete everything in the solr index * delete everything in the solr index
* @throws IOException * @throws IOException
@ -169,7 +134,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server == null) return; if (this.server == null) return;
synchronized (this.server) { synchronized (this.server) {
try { try {
this.server.deleteByQuery(AbstractSolrConnector.CATCHALL_TERM); this.server.deleteByQuery(AbstractSolrConnector.CATCHALL_QUERY);
this.server.commit(true, true, false); this.server.commit(true, true, false);
} catch (final Throwable e) { } catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM clearCaches(); // prevent further OOM if this was caused by OOM
@ -345,10 +310,83 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
} }
} }
// luke requests: these do not work for attached SolrCloud Server
public Collection<FieldInfo> getFields() throws SolrServerException { public Collection<FieldInfo> getFields() throws SolrServerException {
// get all fields contained in index // get all fields contained in index
return getIndexBrowser(false).getFieldInfo().values(); return getIndexBrowser(false).getFieldInfo().values();
} }
/**
* get the number of segments.
* @return the number of segments, or 0 if unknown
*/
public int getSegmentCount() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
NamedList<Object> info = lukeResponse.getIndexInfo();
if (info == null) return 0;
Integer segmentCount = (Integer) info.get("segmentCount");
if (segmentCount == null) return 1;
return segmentCount.intValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
private int useluke = 0; // 3-value logic: 1=yes, -1=no, 0=dontknow
@Override
public long getSize() {
if (this.server == null) return 0;
if (this.server instanceof ServerShard) {
// the server can be a single shard; we don't know here
// to test that, we submit requests to bots variants
if (useluke == 1) return getSizeLukeRequest();
if (useluke == -1) return getSizeQueryRequest();
long ls = getSizeLukeRequest();
long qs = getSizeQueryRequest();
if (ls == qs) {
useluke = 1;
return ls;
}
useluke = -1;
return qs;
}
return getSizeLukeRequest();
}
private long getSizeQueryRequest() {
if (this.server == null) return 0;
try {
final QueryResponse rsp = getResponseByParams(AbstractSolrConnector.catchSuccessQuery);
if (rsp == null) return 0;
final SolrDocumentList docs = rsp.getResults();
if (docs == null) return 0;
return docs.getNumFound();
} catch (final Throwable e) {
log.warn(e);
return 0;
}
}
private long getSizeLukeRequest() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
if (lukeResponse == null) return 0;
Integer numDocs = lukeResponse.getNumDocs();
if (numDocs == null) return 0;
return numDocs.longValue();
} catch (final Throwable e) {
clearCaches(); // prevent further OOM if this was caused by OOM
log.warn(e);
return 0;
}
}
private LukeResponse getIndexBrowser(final boolean showSchema) throws SolrServerException { private LukeResponse getIndexBrowser(final boolean showSchema) throws SolrServerException {
// get all fields contained in index // get all fields contained in index
@ -356,33 +394,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
lukeRequest.setResponseParser(new XMLResponseParser()); lukeRequest.setResponseParser(new XMLResponseParser());
lukeRequest.setNumTerms(0); lukeRequest.setNumTerms(0);
lukeRequest.setShowSchema(showSchema); lukeRequest.setShowSchema(showSchema);
/*
final SolrRequest lukeRequest = new SolrRequest(METHOD.GET, "/admin/luke") {
private static final long serialVersionUID = 1L;
@Override
public Collection<ContentStream> getContentStreams() throws IOException {
return null;
}
@Override
public SolrParams getParams() {
ModifiableSolrParams params = new ModifiableSolrParams();
//params.add("numTerms", "1");
params.add("_", "" + System.currentTimeMillis()); // cheat a proxy
if (showSchema) params.add("show", "schema");
return params;
}
@Override
public LukeResponse process(SolrServer server) throws SolrServerException, IOException {
long startTime = System.currentTimeMillis();
LukeResponse res = new LukeResponse();
this.setResponseParser(new XMLResponseParser());
NamedList<Object> response = server.request(this);
res.setResponse(response);
res.setElapsedTime(System.currentTimeMillis() - startTime);
return res;
}
};
*/
LukeResponse lukeResponse = null; LukeResponse lukeResponse = null;
try { try {
lukeResponse = lukeRequest.process(this.server); lukeResponse = lukeRequest.process(this.server);

@ -81,14 +81,14 @@ public class ResponseAccumulator {
for (Map.Entry<String, Object> e: facet_counts) facet_countsAcc.add(e.getKey(), e.getValue()); for (Map.Entry<String, Object> e: facet_counts) facet_countsAcc.add(e.getKey(), e.getValue());
} }
// accumulate the index (thats the result from a luke request) // accumulate the index
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
SimpleOrderedMap<Object> index_counts = (SimpleOrderedMap<Object>) response.get("index"); SimpleOrderedMap<Object> index_counts = (SimpleOrderedMap<Object>) response.get("index");
if (index_counts != null) { if (index_counts != null) {
for (Map.Entry<String, Object> e: index_counts) index_countsAcc.add(e.getKey(), e.getValue()); for (Map.Entry<String, Object> e: index_counts) index_countsAcc.add(e.getKey(), e.getValue());
} }
// accumulate the fields (thats the result from a luke request) // accumulate the fields
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
SimpleOrderedMap<Object> schema = (SimpleOrderedMap<Object>) response.get("schema"); SimpleOrderedMap<Object> schema = (SimpleOrderedMap<Object>) response.get("schema");
if (schema != null) { if (schema != null) {

@ -102,6 +102,7 @@ import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.FailCategory; import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaConfiguration; import net.yacy.cora.federate.solr.SchemaConfiguration;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.instance.RemoteInstance; import net.yacy.cora.federate.solr.instance.RemoteInstance;
import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
@ -2311,17 +2312,17 @@ public final class Switchboard extends serverSwitch {
// run postprocessing on these profiles // run postprocessing on these profiles
postprocessingRunning = true; postprocessingRunning = true;
postprocessingStartTime[0] = System.currentTimeMillis(); postprocessingStartTime[0] = System.currentTimeMillis();
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, profileHash); for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, profileHash);
postprocessingStartTime[0] = 0; postprocessingStartTime[0] = 0;
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} // should be zero but you never know try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know
if (processWebgraph) { if (processWebgraph) {
postprocessingStartTime[1] = System.currentTimeMillis(); postprocessingStartTime[1] = System.currentTimeMillis();
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
for (String profileHash: deletionCandidates) proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, profileHash); for (String profileHash: deletionCandidates) proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, profileHash);
postprocessingStartTime[1] = 0; postprocessingStartTime[1] = 0;
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
} }
this.crawler.cleanProfiles(deletionCandidates); this.crawler.cleanProfiles(deletionCandidates);
log.info("cleanup removed " + cleanupByHarvestkey + " crawl profiles, post-processed " + proccount + " documents"); log.info("cleanup removed " + cleanupByHarvestkey + " crawl profiles, post-processed " + proccount + " documents");
@ -2329,17 +2330,17 @@ public final class Switchboard extends serverSwitch {
// run postprocessing on all profiles // run postprocessing on all profiles
postprocessingRunning = true; postprocessingRunning = true;
postprocessingStartTime[0] = System.currentTimeMillis(); postprocessingStartTime[0] = System.currentTimeMillis();
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, null); proccount += collection1Configuration.postprocessing(index, rrCache, clickdepthCache, null);
postprocessingStartTime[0] = 0; postprocessingStartTime[0] = 0;
try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} // should be zero but you never know try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know
if (processWebgraph) { if (processWebgraph) {
postprocessingStartTime[1] = System.currentTimeMillis(); postprocessingStartTime[1] = System.currentTimeMillis();
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, null); proccount += webgraphConfiguration.postprocessing(index, clickdepthCache, null);
postprocessingStartTime[1] = 0; postprocessingStartTime[1] = 0;
try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} try {postprocessingCount[1] = (int) fulltext.getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
} }
this.crawler.cleanProfiles(this.crawler.getActiveProfiles()); this.crawler.cleanProfiles(this.crawler.getActiveProfiles());
log.info("cleanup post-processed " + proccount + " documents"); log.info("cleanup post-processed " + proccount + " documents");

@ -38,6 +38,7 @@ import org.apache.solr.common.params.CommonParams;
import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.FailCategory; import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlProfile; import net.yacy.crawler.data.CrawlProfile;
import net.yacy.search.index.Fulltext; import net.yacy.search.index.Fulltext;
@ -65,7 +66,7 @@ public class ErrorCache {
params.setFacet(false); params.setFacet(false);
params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc)); params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc));
params.setFields(CollectionSchema.id.getSolrFieldName()); params.setFields(CollectionSchema.id.getSolrFieldName());
params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given params.set(CommonParams.DF, CollectionSchema.id.getSolrFieldName()); // DisMaxParams.QF or CommonParams.DF must be given
SolrDocumentList docList = fulltext.getDefaultConnector().getDocumentListByParams(params); SolrDocumentList docList = fulltext.getDefaultConnector().getDocumentListByParams(params);
if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) { if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) {
@ -79,7 +80,7 @@ public class ErrorCache {
public void clear() throws IOException { public void clear() throws IOException {
if (this.cache != null) synchronized (this.cache) {this.cache.clear();} if (this.cache != null) synchronized (this.cache) {this.cache.clear();}
this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
} }
public void removeHosts(final Set<String> hosthashes) { public void removeHosts(final Set<String> hosthashes) {
@ -166,7 +167,7 @@ public class ErrorCache {
} }
if (failDoc != null) return failDoc; if (failDoc != null) return failDoc;
try { try {
final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + ":[* TO *]", 0, 1); final SolrDocumentList docs = this.fulltext.getDefaultConnector().getDocumentListByQuery(CollectionSchema.id + ":\"" + urlhash + "\" AND " + CollectionSchema.failtype_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM, 0, 1);
if (docs == null || docs.isEmpty()) return null; if (docs == null || docs.isEmpty()) return null;
SolrDocument doc = docs.get(0); SolrDocument doc = docs.get(0);
if (doc == null) return null; if (doc == null) return null;

@ -376,7 +376,7 @@ public final class Fulltext {
* @param hosthashes * @param hosthashes
*/ */
public void deleteDomainErrors(final Set<String> hosthashes) { public void deleteDomainErrors(final Set<String> hosthashes) {
deleteDomainWithConstraint(this.getDefaultConnector(), CollectionSchema.host_id_s.getSolrFieldName(), hosthashes, CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); deleteDomainWithConstraint(this.getDefaultConnector(), CollectionSchema.host_id_s.getSolrFieldName(), hosthashes, CollectionSchema.failreason_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
} }
private static void deleteDomainWithConstraint(SolrConnector connector, String fieldname, final Set<String> hosthashes, String constraintQuery) { private static void deleteDomainWithConstraint(SolrConnector connector, String fieldname, final Set<String> hosthashes, String constraintQuery) {

@ -20,13 +20,18 @@ package net.yacy.search.index;
*/ */
import java.io.IOException; import java.io.IOException;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.workflow.AbstractBusyThread; import net.yacy.kelondro.workflow.AbstractBusyThread;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
@ -92,7 +97,7 @@ import org.apache.solr.common.SolrInputDocument;
*/ */
public void addSelectFieldname(String field) { public void addSelectFieldname(String field) {
if (field != null && !field.isEmpty()) { if (field != null && !field.isEmpty()) {
querylist.add(field + ":[* TO *]"); querylist.add(field + AbstractSolrConnector.CATCHALL_DTERM);
} }
} }

@ -485,7 +485,7 @@ public class Segment {
final BlockingQueue<SolrDocument> docQueue; final BlockingQueue<SolrDocument> docQueue;
final String urlstub; final String urlstub;
if (stub == null) { if (stub == null) {
docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_TERM, 0, Integer.MAX_VALUE, maxtime, maxcount, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName()); docQueue = this.fulltext.getDefaultConnector().concurrentDocumentsByQuery(AbstractSolrConnector.CATCHALL_QUERY, 0, Integer.MAX_VALUE, maxtime, maxcount, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
urlstub = null; urlstub = null;
} else { } else {
final String host = stub.getHost(); final String host = stub.getHost();

@ -36,6 +36,7 @@ import net.yacy.cora.document.WordCache;
import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaDeclaration; import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.SolrType; import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.order.NaturalOrder; import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.document.parser.html.AbstractScraper; import net.yacy.document.parser.html.AbstractScraper;
@ -354,7 +355,7 @@ public class QueryGoal {
// add filter to prevent that results come from failed urls // add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND ("); q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR "); q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM + " OR ");
q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR "); q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR ");
q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))"); q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");

@ -924,7 +924,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// collect hosts from index which shall take part in citation computation // collect hosts from index which shall take part in citation computation
String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
ReversibleScoreMap<String> hostscore; ReversibleScoreMap<String> hostscore;
try { try {
Map<String, ReversibleScoreMap<String>> hostfacet = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName()); Map<String, ReversibleScoreMap<String>> hostfacet = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName());
@ -950,7 +950,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// This shall fulfill the following requirement: // This shall fulfill the following requirement:
// If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C. // If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C.
// To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + ":[* TO *]"; String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
long patchquerycount = collectionConnector.getCountByQuery(patchquery); long patchquerycount = collectionConnector.getCountByQuery(patchquery);
BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, 0, 10000000, 600000, 100, BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, 0, 10000000, 600000, 100,
CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName()); CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
@ -1065,7 +1065,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// process all documents in collection // process all documents in collection
query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
Map<String, Long> hostExtentCache = new HashMap<String, Long>(); // a mapping from the host id to the number of documents which contain this host-id Map<String, Long> hostExtentCache = new HashMap<String, Long>(); // a mapping from the host id to the number of documents which contain this host-id
Set<String> uniqueURLs = new HashSet<String>(); Set<String> uniqueURLs = new HashSet<String>();
try { try {
@ -1311,7 +1311,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
ncr += d[0] / ilc; ncr += d[0] / ilc;
} else { } else {
// Output a warning that d[] is empty // Output a warning that d[] is empty
ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + iid); ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + ASCII.String(iid));
break; break;
} }
} }

@ -315,7 +315,7 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
// that means we must search for those entries. // that means we must search for those entries.
webgraphConnector.commit(true); // make sure that we have latest information that can be found webgraphConnector.commit(true); // make sure that we have latest information that can be found
//BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10); //BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
String query = (harvestkey == null || !this.contains(WebgraphSchema.harvestkey_s) ? "" : WebgraphSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]"; String query = (harvestkey == null || !this.contains(WebgraphSchema.harvestkey_s) ? "" : WebgraphSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") + WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
BlockingQueue<SolrDocument> docs = webgraphConnector.concurrentDocumentsByQuery(query, 0, 10000000, 1800000, 100); BlockingQueue<SolrDocument> docs = webgraphConnector.concurrentDocumentsByQuery(query, 0, 10000000, 1800000, 100);
SolrDocument doc; SolrDocument doc;

Loading…
Cancel
Save