From ead48c4b25ce6c5c398a97814429f7aedd4ededb Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 30 Aug 2009 10:28:23 +0000 Subject: [PATCH] fix for preparation of search result pages with offset > 10: - less pages are fetched in advance - just-in-time fetch of next required pages - fix for missing hand-over of offset to fetch threads git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6279 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/AccessTracker_p.java | 4 +- source/de/anomic/search/QueryParams.java | 14 +++---- source/de/anomic/search/ResultFetcher.java | 42 ++++++++++++------- source/de/anomic/search/SearchEvent.java | 21 ++++------ source/de/anomic/search/SearchEventCache.java | 14 +------ 5 files changed, 44 insertions(+), 51 deletions(-) diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java index 30a0fd961..923aa7d5c 100644 --- a/htroot/AccessTracker_p.java +++ b/htroot/AccessTracker_p.java @@ -169,12 +169,12 @@ public class AccessTracker_p { prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "" : searchProfile.remotepeer.getName()); prop.put("page_list_" + entCount + "_queryhashes", QueryParams.anonymizedQueryHashes(searchProfile.queryHashes)); } - prop.putNum("page_list_" + entCount + "_querycount", searchProfile.linesPerPage); + prop.putNum("page_list_" + entCount + "_querycount", searchProfile.itemsPerPage); prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount); prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime); prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime); prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime); - qcountSum += searchProfile.linesPerPage; + qcountSum += searchProfile.itemsPerPage; rcountSum += searchProfile.resultcount; utimeSum += searchProfile.urlretrievaltime; stimeSum += searchProfile.snippetcomputationtime; diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index dd1f45d8b..30a00fdd1 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -64,7 +64,7 @@ public final class QueryParams { public String queryString; public TreeSet fullqueryHashes, queryHashes, excludeHashes; - public int linesPerPage, offset; + public int itemsPerPage, offset; public String prefer; public int contentdom; public String urlMask; @@ -90,7 +90,7 @@ public final class QueryParams { public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options public QueryParams(final String queryString, - final int lines, + final int itemsPerPage, final RankingProfile ranking, final Bitfield constraint) { if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) { @@ -110,7 +110,7 @@ public final class QueryParams { this.maxDistance = Integer.MAX_VALUE; this.prefer = ""; this.contentdom = CONTENTDOM_ALL; - this.linesPerPage = lines; + this.itemsPerPage = itemsPerPage; this.offset = 0; this.urlMask = ".*"; this.targetlang = "en"; @@ -139,7 +139,7 @@ public final class QueryParams { final String language, final String navigators, final boolean onlineSnippetFetch, - final int lines, final int offset, final String urlMask, + final int itemsPerPage, final int offset, final String urlMask, final int domType, final int domMaxTargets, final Bitfield constraint, final boolean allofconstraint, final String site, @@ -156,7 +156,7 @@ public final class QueryParams { this.maxDistance = maxDistance; this.prefer = prefer; this.contentdom = contentdom; - this.linesPerPage = Math.min((specialRights) ? 1000 : 50, lines); + this.itemsPerPage = Math.min((specialRights) ? 1000 : 50, itemsPerPage); this.offset = Math.min((specialRights) ? 10000 : 100, offset); this.urlMask = urlMask; assert language != null; @@ -178,12 +178,12 @@ public final class QueryParams { public int neededResults() { // the number of result lines that must be computed - return this.offset + this.linesPerPage; + return this.offset + this.itemsPerPage; } public int displayResults() { // the number of result lines that are displayed at once (size of result page) - return this.linesPerPage; + return this.itemsPerPage; } public void setOffset(final int newOffset) { diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index a1dff8b41..7e6b8bd85 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -50,7 +50,7 @@ public class ResultFetcher { // input values final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container - final QueryParams query; + QueryParams query; private final Segment indexSegment; private final yacySeedDB peers; @@ -91,23 +91,18 @@ public class ResultFetcher { } // start worker threads to fetch urls and snippets - this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1]; - for (int i = 0; i < this.workerThreads.length; i++) { - this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0); - this.workerThreads[i].start(); - } + this.workerThreads = null; + deployWorker(10); serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), this.workerThreads.length + " online snippet fetch threads started", 0, 0), false); } - public void restartWorker() { + public void deployWorker(int neededResults) { if (anyWorkerAlive()) return; - this.workerThreads = new Worker[workerThreadCount]; - Worker worker; - for (int i = 0; i < workerThreads.length; i++) { - worker = new Worker(i, 6000, (query.onlineSnippetFetch) ? 2 : 0); - worker.start(); - workerThreads[i] = worker; + this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1]; + for (int i = 0; i < workerThreads.length; i++) { + this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0, neededResults); + this.workerThreads[i].start(); } } @@ -136,12 +131,14 @@ public class ResultFetcher { private long lastLifeSign; // when the last time the run()-loop was executed private final int id; private int snippetMode; + private int neededResults; - public Worker(final int id, final long maxlifetime, int snippetMode) { + public Worker(final int id, final long maxlifetime, int snippetMode, int neededResults) { this.id = id; this.snippetMode = snippetMode; this.lastLifeSign = System.currentTimeMillis(); this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); + this.neededResults = neededResults; } public void run() { @@ -152,6 +149,7 @@ public class ResultFetcher { boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0; try { while (System.currentTimeMillis() < this.timeout) { + if (result.size() >= neededResults) break; this.lastLifeSign = System.currentTimeMillis(); // check if we have enough @@ -285,10 +283,24 @@ public class ResultFetcher { return this.result.element(item).element; } + System.out.println("rankedCache.size() = " + this.rankedCache.size()); + System.out.println("result.size() = " + this.result.size()); + System.out.println("query.neededResults() = " + query.neededResults()); + + if ((!anyWorkerAlive()) && + (((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (images.size() + 30 < query.neededResults())) || + (this.result.size() < query.neededResults())) && + //(event.query.onlineSnippetFetch) && + (this.rankedCache.size() > this.result.size()) + ) { + // start worker threads to fetch urls and snippets + deployWorker(query.neededResults()); + } + // finally wait until enough results are there produced from the // snippet fetch process while ((anyWorkerAlive()) && (result.size() <= item)) { - try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {} + try {Thread.sleep((item % query.itemsPerPage) * 50L);} catch (final InterruptedException e) {} } // finally, if there is something, return the result diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index df164896a..f9def9b0c 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -66,7 +66,7 @@ public final class SearchEvent { private final Segment indexSegment; private final yacySeedDB peers; private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container - private ResultFetcher snippets; + private ResultFetcher results; // class variables for search abstracts private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation @@ -176,7 +176,7 @@ public final class SearchEvent { } // start worker threads to fetch urls and snippets - this.snippets = new ResultFetcher(rankedCache, query, indexSegment, peers); + this.results = new ResultFetcher(rankedCache, query, indexSegment, peers); // clean up events SearchEventCache.cleanupEvents(false); @@ -201,11 +201,12 @@ public final class SearchEvent { public void setQuery(QueryParams query) { this.query = query; + this.results.query = query; } public void cleanup() { // execute deletion of failed words - int rw = this.snippets.failedURLs.size(); + int rw = this.results.failedURLs.size(); if (rw > 0) { final TreeSet removeWords = query.queryHashes; removeWords.addAll(query.excludeHashes); @@ -213,7 +214,7 @@ public final class SearchEvent { final Iterator j = removeWords.iterator(); // remove the same url hashes for multiple words while (j.hasNext()) { - this.indexSegment.termIndex().remove(j.next(), this.snippets.failedURLs.keySet()); + this.indexSegment.termIndex().remove(j.next(), this.results.failedURLs.keySet()); } } catch (IOException e) { e.printStackTrace(); @@ -311,16 +312,8 @@ public final class SearchEvent { // remote search requests, wait that the local process terminates first try {localSearchThread.join();} catch (InterruptedException e) {} } - // now wait until as many remote worker threads have finished, as we - // want to display results - while (this.primarySearchThreads != null && - this.primarySearchThreads.length > item && - this.snippets.anyWorkerAlive() && - (this.snippets.resultCount() <= item || countFinishedRemoteSearch() <= item)) { - try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {} - } } - return this.snippets.oneResult(item); + return this.results.oneResult(item); } boolean secondarySearchStartet = false; @@ -401,7 +394,7 @@ public final class SearchEvent { } public ResultFetcher result() { - return this.snippets; + return this.results; } } diff --git a/source/de/anomic/search/SearchEventCache.java b/source/de/anomic/search/SearchEventCache.java index b22715de9..db8322f1d 100644 --- a/source/de/anomic/search/SearchEventCache.java +++ b/source/de/anomic/search/SearchEventCache.java @@ -90,20 +90,8 @@ public class SearchEventCache { } } if (event == null) { - // generate a new event + // start a new event event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts); - } else { - // if worker threads had been alive, but did not succeed, start them again to fetch missing links - if ((!event.result().anyWorkerAlive()) && - (((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.result().images.size() + 30 < query.neededResults())) || - (event.result().result.size() < query.neededResults() + 10)) && - //(event.query.onlineSnippetFetch) && - (event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.result().result.size())) { - // set new timeout - event.resetEventTime(); - // start worker threads to fetch urls and snippets - event.result().restartWorker(); - } } return event;