fix for preparation of search result pages with offset > 10:

- less pages are fetched in advance
- just-in-time fetch of next required pages
- fix for missing hand-over of offset to fetch threads

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6279 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 39a311d608
commit ead48c4b25

@ -169,12 +169,12 @@ public class AccessTracker_p {
prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "<unknown>" : searchProfile.remotepeer.getName()); prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "<unknown>" : searchProfile.remotepeer.getName());
prop.put("page_list_" + entCount + "_queryhashes", QueryParams.anonymizedQueryHashes(searchProfile.queryHashes)); prop.put("page_list_" + entCount + "_queryhashes", QueryParams.anonymizedQueryHashes(searchProfile.queryHashes));
} }
prop.putNum("page_list_" + entCount + "_querycount", searchProfile.linesPerPage); prop.putNum("page_list_" + entCount + "_querycount", searchProfile.itemsPerPage);
prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount); prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount);
prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime); prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime);
prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime); prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime);
prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime); prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime);
qcountSum += searchProfile.linesPerPage; qcountSum += searchProfile.itemsPerPage;
rcountSum += searchProfile.resultcount; rcountSum += searchProfile.resultcount;
utimeSum += searchProfile.urlretrievaltime; utimeSum += searchProfile.urlretrievaltime;
stimeSum += searchProfile.snippetcomputationtime; stimeSum += searchProfile.snippetcomputationtime;

@ -64,7 +64,7 @@ public final class QueryParams {
public String queryString; public String queryString;
public TreeSet<byte[]> fullqueryHashes, queryHashes, excludeHashes; public TreeSet<byte[]> fullqueryHashes, queryHashes, excludeHashes;
public int linesPerPage, offset; public int itemsPerPage, offset;
public String prefer; public String prefer;
public int contentdom; public int contentdom;
public String urlMask; public String urlMask;
@ -90,7 +90,7 @@ public final class QueryParams {
public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options
public QueryParams(final String queryString, public QueryParams(final String queryString,
final int lines, final int itemsPerPage,
final RankingProfile ranking, final RankingProfile ranking,
final Bitfield constraint) { final Bitfield constraint) {
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) { if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) {
@ -110,7 +110,7 @@ public final class QueryParams {
this.maxDistance = Integer.MAX_VALUE; this.maxDistance = Integer.MAX_VALUE;
this.prefer = ""; this.prefer = "";
this.contentdom = CONTENTDOM_ALL; this.contentdom = CONTENTDOM_ALL;
this.linesPerPage = lines; this.itemsPerPage = itemsPerPage;
this.offset = 0; this.offset = 0;
this.urlMask = ".*"; this.urlMask = ".*";
this.targetlang = "en"; this.targetlang = "en";
@ -139,7 +139,7 @@ public final class QueryParams {
final String language, final String language,
final String navigators, final String navigators,
final boolean onlineSnippetFetch, final boolean onlineSnippetFetch,
final int lines, final int offset, final String urlMask, final int itemsPerPage, final int offset, final String urlMask,
final int domType, final int domMaxTargets, final int domType, final int domMaxTargets,
final Bitfield constraint, final boolean allofconstraint, final Bitfield constraint, final boolean allofconstraint,
final String site, final String site,
@ -156,7 +156,7 @@ public final class QueryParams {
this.maxDistance = maxDistance; this.maxDistance = maxDistance;
this.prefer = prefer; this.prefer = prefer;
this.contentdom = contentdom; this.contentdom = contentdom;
this.linesPerPage = Math.min((specialRights) ? 1000 : 50, lines); this.itemsPerPage = Math.min((specialRights) ? 1000 : 50, itemsPerPage);
this.offset = Math.min((specialRights) ? 10000 : 100, offset); this.offset = Math.min((specialRights) ? 10000 : 100, offset);
this.urlMask = urlMask; this.urlMask = urlMask;
assert language != null; assert language != null;
@ -178,12 +178,12 @@ public final class QueryParams {
public int neededResults() { public int neededResults() {
// the number of result lines that must be computed // the number of result lines that must be computed
return this.offset + this.linesPerPage; return this.offset + this.itemsPerPage;
} }
public int displayResults() { public int displayResults() {
// the number of result lines that are displayed at once (size of result page) // the number of result lines that are displayed at once (size of result page)
return this.linesPerPage; return this.itemsPerPage;
} }
public void setOffset(final int newOffset) { public void setOffset(final int newOffset) {

@ -50,7 +50,7 @@ public class ResultFetcher {
// input values // input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
final QueryParams query; QueryParams query;
private final Segment indexSegment; private final Segment indexSegment;
private final yacySeedDB peers; private final yacySeedDB peers;
@ -91,23 +91,18 @@ public class ResultFetcher {
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1]; this.workerThreads = null;
for (int i = 0; i < this.workerThreads.length; i++) { deployWorker(10);
this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0);
this.workerThreads[i].start();
}
serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), this.workerThreads.length + " online snippet fetch threads started", 0, 0), false); serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), this.workerThreads.length + " online snippet fetch threads started", 0, 0), false);
} }
public void restartWorker() { public void deployWorker(int neededResults) {
if (anyWorkerAlive()) return; if (anyWorkerAlive()) return;
this.workerThreads = new Worker[workerThreadCount]; this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
Worker worker; for (int i = 0; i < workerThreads.length; i++) {
for (int i = 0; i < workerThreads.length; i++) { this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0, neededResults);
worker = new Worker(i, 6000, (query.onlineSnippetFetch) ? 2 : 0); this.workerThreads[i].start();
worker.start();
workerThreads[i] = worker;
} }
} }
@ -136,12 +131,14 @@ public class ResultFetcher {
private long lastLifeSign; // when the last time the run()-loop was executed private long lastLifeSign; // when the last time the run()-loop was executed
private final int id; private final int id;
private int snippetMode; private int snippetMode;
private int neededResults;
public Worker(final int id, final long maxlifetime, int snippetMode) { public Worker(final int id, final long maxlifetime, int snippetMode, int neededResults) {
this.id = id; this.id = id;
this.snippetMode = snippetMode; this.snippetMode = snippetMode;
this.lastLifeSign = System.currentTimeMillis(); this.lastLifeSign = System.currentTimeMillis();
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults;
} }
public void run() { public void run() {
@ -152,6 +149,7 @@ public class ResultFetcher {
boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0; boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0;
try { try {
while (System.currentTimeMillis() < this.timeout) { while (System.currentTimeMillis() < this.timeout) {
if (result.size() >= neededResults) break;
this.lastLifeSign = System.currentTimeMillis(); this.lastLifeSign = System.currentTimeMillis();
// check if we have enough // check if we have enough
@ -285,10 +283,24 @@ public class ResultFetcher {
return this.result.element(item).element; return this.result.element(item).element;
} }
System.out.println("rankedCache.size() = " + this.rankedCache.size());
System.out.println("result.size() = " + this.result.size());
System.out.println("query.neededResults() = " + query.neededResults());
if ((!anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (images.size() + 30 < query.neededResults())) ||
(this.result.size() < query.neededResults())) &&
//(event.query.onlineSnippetFetch) &&
(this.rankedCache.size() > this.result.size())
) {
// start worker threads to fetch urls and snippets
deployWorker(query.neededResults());
}
// finally wait until enough results are there produced from the // finally wait until enough results are there produced from the
// snippet fetch process // snippet fetch process
while ((anyWorkerAlive()) && (result.size() <= item)) { while ((anyWorkerAlive()) && (result.size() <= item)) {
try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {} try {Thread.sleep((item % query.itemsPerPage) * 50L);} catch (final InterruptedException e) {}
} }
// finally, if there is something, return the result // finally, if there is something, return the result

@ -66,7 +66,7 @@ public final class SearchEvent {
private final Segment indexSegment; private final Segment indexSegment;
private final yacySeedDB peers; private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher snippets; private ResultFetcher results;
// class variables for search abstracts // class variables for search abstracts
private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
@ -176,7 +176,7 @@ public final class SearchEvent {
} }
// start worker threads to fetch urls and snippets // start worker threads to fetch urls and snippets
this.snippets = new ResultFetcher(rankedCache, query, indexSegment, peers); this.results = new ResultFetcher(rankedCache, query, indexSegment, peers);
// clean up events // clean up events
SearchEventCache.cleanupEvents(false); SearchEventCache.cleanupEvents(false);
@ -201,11 +201,12 @@ public final class SearchEvent {
public void setQuery(QueryParams query) { public void setQuery(QueryParams query) {
this.query = query; this.query = query;
this.results.query = query;
} }
public void cleanup() { public void cleanup() {
// execute deletion of failed words // execute deletion of failed words
int rw = this.snippets.failedURLs.size(); int rw = this.results.failedURLs.size();
if (rw > 0) { if (rw > 0) {
final TreeSet<byte[]> removeWords = query.queryHashes; final TreeSet<byte[]> removeWords = query.queryHashes;
removeWords.addAll(query.excludeHashes); removeWords.addAll(query.excludeHashes);
@ -213,7 +214,7 @@ public final class SearchEvent {
final Iterator<byte[]> j = removeWords.iterator(); final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words // remove the same url hashes for multiple words
while (j.hasNext()) { while (j.hasNext()) {
this.indexSegment.termIndex().remove(j.next(), this.snippets.failedURLs.keySet()); this.indexSegment.termIndex().remove(j.next(), this.results.failedURLs.keySet());
} }
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
@ -311,16 +312,8 @@ public final class SearchEvent {
// remote search requests, wait that the local process terminates first // remote search requests, wait that the local process terminates first
try {localSearchThread.join();} catch (InterruptedException e) {} try {localSearchThread.join();} catch (InterruptedException e) {}
} }
// now wait until as many remote worker threads have finished, as we
// want to display results
while (this.primarySearchThreads != null &&
this.primarySearchThreads.length > item &&
this.snippets.anyWorkerAlive() &&
(this.snippets.resultCount() <= item || countFinishedRemoteSearch() <= item)) {
try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {}
}
} }
return this.snippets.oneResult(item); return this.results.oneResult(item);
} }
boolean secondarySearchStartet = false; boolean secondarySearchStartet = false;
@ -401,7 +394,7 @@ public final class SearchEvent {
} }
public ResultFetcher result() { public ResultFetcher result() {
return this.snippets; return this.results;
} }
} }

@ -90,20 +90,8 @@ public class SearchEventCache {
} }
} }
if (event == null) { if (event == null) {
// generate a new event // start a new event
event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts); event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
} else {
// if worker threads had been alive, but did not succeed, start them again to fetch missing links
if ((!event.result().anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.result().images.size() + 30 < query.neededResults())) ||
(event.result().result.size() < query.neededResults() + 10)) &&
//(event.query.onlineSnippetFetch) &&
(event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.result().result.size())) {
// set new timeout
event.resetEventTime();
// start worker threads to fetch urls and snippets
event.result().restartWorker();
}
} }
return event; return event;

Loading…
Cancel
Save