better/less requests to local solr; the request is made in chunks which

are exactly at only that size which is needed to present the current
search result page. This will also cause that next solr request are made
automatically during switching to next pages.
pull/1/head
orbiter 12 years ago
parent a734fbc4a5
commit 9c09fd7d0b

@ -127,7 +127,8 @@ public class RemoteSearch extends Thread {
public static void primaryRemoteSearches(
final SearchEvent event,
final int count, final long time,
final int start, final int count,
final long time,
final Blacklist blacklist,
final SortedMap<byte[], String> clusterselection,
final int burstRobinsonPercent,
@ -167,7 +168,7 @@ public class RemoteSearch extends Thread {
// start solr searches
for (Seed s: nodePeers) {
solrRemoteSearch(event, count, s, blacklist);
solrRemoteSearch(event, start, count, s, blacklist);
}
// start search to YaCy DHT peers
@ -252,6 +253,7 @@ public class RemoteSearch extends Thread {
public static Thread solrRemoteSearch(
final SearchEvent event,
final int start,
final int count,
final Seed targetPeer,
final Blacklist blacklist) {
@ -263,17 +265,14 @@ public class RemoteSearch extends Thread {
Thread solr = new Thread() {
@Override
public void run() {
int tmpoffset = 0;
int tmpcount = 10;
while (tmpoffset + tmpcount <= count && tmpcount > 0) {
int urls = 0;
try {
event.oneFeederStarted();
urls = Protocol.solrQuery(
event,
tmpoffset,
tmpcount,
tmpoffset == 0,
start,
count,
start == 0,
targetPeer,
blacklist);
if (urls >= 0) {
@ -290,10 +289,6 @@ public class RemoteSearch extends Thread {
} finally {
event.oneFeederTerminated();
}
if (urls < tmpcount) break; // there won't be more
tmpoffset += tmpcount;
tmpcount = targetPeer == null ? 10 : count - tmpoffset;
}
}
};
/*if (targetPeer == null) solr.run(); else*/ solr.start();

@ -123,7 +123,9 @@ public final class SearchEvent {
private final SortedMap<byte[], String> IAResults;
private final SortedMap<byte[], HeuristicResult> heuristics;
private byte[] IAmaxcounthash, IAneardhthash;
public Thread rwiProcess, localsearch;
public Thread rwiProcess;
private Thread localsolrsearch;
private int localsolroffset;
private final AtomicInteger expectedRemoteReferences, maxExpectedRemoteReferences; // counter for referenced that had been sorted out for other reasons
public final ScoreMap<String> hostNavigator; // a counter for the appearance of host names
public final ScoreMap<String> authorNavigator; // a counter for the appearances of authors
@ -135,7 +137,7 @@ public final class SearchEvent {
private final LoaderDispatcher loader;
private final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets
private final boolean deleteIfSnippetFail;
private long urlRetrievalAllTime;
private long urlRetrievalAllTime;
private long snippetComputationAllTime;
private ConcurrentHashMap<String, String> snippets;
private final boolean remote;
@ -159,15 +161,16 @@ public final class SearchEvent {
public final AtomicInteger local_rwi_stored; // the number of existing hits by the local search in rwi index
public final AtomicInteger remote_rwi_available; // the number of hits imported from remote peers (rwi/solr mixed)
public final AtomicInteger remote_rwi_stored; // the number of existing hits at remote site
public final AtomicInteger remote_rwi_peerCount; // the number of peers which contributed to the remote search result
public final AtomicInteger remote_rwi_peerCount; // the number of peers which contributed to the remote search result
public final AtomicInteger local_solr_available; // the number of hits generated/ranked by the local search in solr
public final AtomicInteger local_solr_stored; // the number of existing hits by the local search in solr
public final AtomicInteger remote_solr_available;// the number of hits imported from remote peers (rwi/solr mixed)
public final AtomicInteger remote_solr_stored; // the number of existing hits at remote site
public final AtomicInteger remote_solr_peerCount;// the number of peers which contributed to the remote search result
public final AtomicInteger remote_solr_peerCount;// the number of peers which contributed to the remote search result
public int getResultCount() {
return this.local_rwi_available.get() + local_solr_stored.get();
return this.local_rwi_available.get() + this.remote_rwi_available.get() +
this.remote_solr_available.get() + this.local_solr_stored.get();
}
protected SearchEvent(
@ -252,8 +255,9 @@ public final class SearchEvent {
}
// start a local solr search
this.localsearch = RemoteSearch.solrRemoteSearch(this, 100, null /*this peer*/, Switchboard.urlBlacklist);
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, 0, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist);
this.localsolroffset = this.query.itemsPerPage;
// start a local RWI search concurrently
this.rwiProcess = null;
if (query.getSegment().connectedRWI() && (!this.remote || this.peers.mySeed().getBirthdate() < noRobinsonLocalRWISearch)) {
@ -278,7 +282,7 @@ public final class SearchEvent {
Thread.currentThread().setName("SearchEvent.primaryRemoteSearches");
RemoteSearch.primaryRemoteSearches(
SearchEvent.this,
remote_maxcount,
0, remote_maxcount,
remote_maxtime,
Switchboard.urlBlacklist,
(SearchEvent.this.query.domType == QueryParams.Searchdom.GLOBAL) ? null : preselectedPeerHashes,
@ -486,8 +490,11 @@ public final class SearchEvent {
assert (iEntry.urlhash().length == index.row().primaryKeyLength);
// doublecheck for urls
if (this.urlhashes.has(iEntry.urlhash())) continue pollloop;
if (this.urlhashes.has(iEntry.urlhash())) {
if (log.isFine()) log.logFine("dropped RWI: doublecheck");
continue pollloop;
}
// increase flag counts
Bitfield flags = iEntry.flags();
for (int j = 0; j < 32; j++) {
@ -495,7 +502,10 @@ public final class SearchEvent {
}
// check constraints
if (!this.testFlags(flags)) continue pollloop;
if (!this.testFlags(flags)) {
if (log.isFine()) log.logFine("dropped RWI: flag test failed");
continue pollloop;
}
// check document domain
if (this.query.contentdom.getCode() > 0 &&
@ -503,6 +513,7 @@ public final class SearchEvent {
(this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Condenser.flag_cat_hasvideo))) ||
(this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Condenser.flag_cat_hasimage))) ||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Condenser.flag_cat_hasapp))))) {
if (log.isFine()) log.logFine("dropped RWI: contentdom fail");
continue pollloop;
}
@ -512,10 +523,16 @@ public final class SearchEvent {
// check site constraints
final String hosthash = iEntry.hosthash();
if ( this.query.modifier.sitehash == null ) {
if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) continue pollloop;
if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) {
if (log.isFine()) log.logFine("dropped RWI: siteexcludes");
continue pollloop;
}
} else {
// filter out all domains that do not match with the site constraint
if (!hosthash.equals(this.query.modifier.sitehash)) continue pollloop;
if (!hosthash.equals(this.query.modifier.sitehash)) {
if (log.isFine()) log.logFine("dropped RWI: modifier.sitehash");
continue pollloop;
}
}
// finally extend the double-check and insert result to stack
@ -526,6 +543,7 @@ public final class SearchEvent {
break rankingtryloop;
} catch ( final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
if (log.isFine()) log.logFine("dropped RWI: arithmetic exception");
continue rankingtryloop;
}
}
@ -735,12 +753,14 @@ public final class SearchEvent {
if ( !this.query.urlMask_isCatchall ) {
// check url mask
if (!iEntry.matches(this.query.urlMask)) {
if (log.isFine()) log.logFine("dropped Node: url mask does not match");
continue pollloop;
}
}
// doublecheck for urls
if (this.urlhashes.has(iEntry.hash())) {
if (log.isFine()) log.logFine("dropped Node: double check");
continue pollloop;
}
@ -751,7 +771,10 @@ public final class SearchEvent {
// check constraints
Bitfield flags = iEntry.flags();
if (!this.testFlags(flags)) continue pollloop;
if (!this.testFlags(flags)) {
if (log.isFine()) log.logFine("dropped Node: flag test");
continue pollloop;
}
// check document domain
if (this.query.contentdom.getCode() > 0 &&
@ -759,6 +782,7 @@ public final class SearchEvent {
(this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Condenser.flag_cat_hasvideo))) ||
(this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Condenser.flag_cat_hasimage))) ||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Condenser.flag_cat_hasapp))))) {
if (log.isFine()) log.logFine("dropped Node: content domain does not match");
continue pollloop;
}
@ -766,11 +790,15 @@ public final class SearchEvent {
final String hosthash = iEntry.hosthash();
if ( this.query.modifier.sitehash == null ) {
if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) {
if (log.isFine()) log.logFine("dropped Node: siteexclude");
continue pollloop;
}
} else {
// filter out all domains that do not match with the site constraint
if (iEntry.url().getHost().indexOf(this.query.modifier.sitehost) < 0) continue pollloop;
if (iEntry.url().getHost().indexOf(this.query.modifier.sitehost) < 0) {
if (log.isFine()) log.logFine("dropped Node: sitehost");
continue pollloop;
}
}
// finally extend the double-check and insert result to stack
@ -1047,14 +1075,21 @@ public final class SearchEvent {
return null;
}
public void drainStacksToResult() {
public boolean drainStacksToResult() {
// we take one entry from both stacks at the same time
boolean success = false;
Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
URIMetadataNode localEntry = localEntryElement == null ? null : localEntryElement.getElement();
if (localEntry != null) addResult(getSnippet(localEntry, null));
if (localEntry != null) {
addResult(getSnippet(localEntry, null));
success = true;
}
if (localEntry == null) {
URIMetadataNode p2pEntry = pullOneFilteredFromRWI(true);
if (p2pEntry != null) addResult(getSnippet(p2pEntry, null));
if (p2pEntry != null) {
addResult(getSnippet(p2pEntry, null));
success = true;
}
} else {
new Thread() {
public void run() {
@ -1063,6 +1098,7 @@ public final class SearchEvent {
}
}.start();
}
return success;
}
/**
@ -1188,33 +1224,23 @@ public final class SearchEvent {
final long finishTime = System.currentTimeMillis() + timeout;
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "started, item = " + item + ", available = " + this.getResultCount(), 0, 0), false);
// check if we have a success
if (this.resultList.sizeAvailable() > item) {
// we have the wanted result already in the result array .. return that
final ResultEntry re = this.resultList.element(item).getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "prefetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false);
return re;
// wait until a local solr is finished, we must do that to be able to check if we need more
if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {
try {this.localsolrsearch.join();} catch (InterruptedException e) {}
}
// we must wait some time until the first result page is full to get enough elements for ranking
/*
if (this.remote && item < 10 && !this.feedingIsFinished()) {
// the first 10 results have a very special timing to get most of the remote results ordered
// before they are presented on the first lines .. yes sleeps seem to be bad. but how shall we predict how long other
// peers will take until they respond?
long stoptime = System.currentTimeMillis() + Math.min(timeout, item == 0 ? 100 : (10 - item) * 9); // the first result takes the longest time
while (System.currentTimeMillis() < stoptime) {
//drainStacksToResult();
try { Thread.sleep(10); } catch (final InterruptedException e) { Log.logException(e); }
}
this.localsolrsearch = null;
if (item >= this.localsolroffset && this.local_solr_stored.get() >= item) {
// load remaining solr results now
int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.localsolroffset, nextitems, null /*this peer*/, Switchboard.urlBlacklist);
this.localsolroffset += nextitems;
}
*/
// now do this as long as needed
while ((!this.feedingIsFinished() || this.rwiQueueSize() > 0 || this.nodeStack.sizeQueue() > 0) &&
this.resultList.sizeAvailable() < item + 1 && System.currentTimeMillis() < finishTime) {
drainStacksToResult();
try { Thread.sleep(10); } catch (final InterruptedException e) { Log.logException(e); }
// now pull results as long as needed and as long as possible
while ( this.resultList.sizeAvailable() <= item &&
(this.rwiQueueSize() > 0 || this.nodeStack.sizeQueue() > 0 ||
(!this.feedingIsFinished() && System.currentTimeMillis() < finishTime))) {
if (!drainStacksToResult()) try {Thread.sleep(10);} catch (final InterruptedException e) {Log.logException(e);}
}
// check if we have a success
@ -1222,6 +1248,12 @@ public final class SearchEvent {
// we have the wanted result already in the result array .. return that
final ResultEntry re = this.resultList.element(item).getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false);
if (this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) {
// at the end of a list, trigger a next solr search
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, Switchboard.urlBlacklist);
this.localsolroffset += this.query.itemsPerPage;
}
return re;
}

Loading…
Cancel
Save