fix for preparation of search result pages with offset > 10:

- less pages are fetched in advance
- just-in-time fetch of next required pages
- fix for missing hand-over of offset to fetch threads

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6279 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 39a311d608
commit ead48c4b25

@ -169,12 +169,12 @@ public class AccessTracker_p {
prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "<unknown>" : searchProfile.remotepeer.getName());
prop.put("page_list_" + entCount + "_queryhashes", QueryParams.anonymizedQueryHashes(searchProfile.queryHashes));
}
prop.putNum("page_list_" + entCount + "_querycount", searchProfile.linesPerPage);
prop.putNum("page_list_" + entCount + "_querycount", searchProfile.itemsPerPage);
prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount);
prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime);
prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime);
prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime);
qcountSum += searchProfile.linesPerPage;
qcountSum += searchProfile.itemsPerPage;
rcountSum += searchProfile.resultcount;
utimeSum += searchProfile.urlretrievaltime;
stimeSum += searchProfile.snippetcomputationtime;

@ -64,7 +64,7 @@ public final class QueryParams {
public String queryString;
public TreeSet<byte[]> fullqueryHashes, queryHashes, excludeHashes;
public int linesPerPage, offset;
public int itemsPerPage, offset;
public String prefer;
public int contentdom;
public String urlMask;
@ -90,7 +90,7 @@ public final class QueryParams {
public boolean specialRights; // is true if the user has a special authorization and my use more database-extensive options
public QueryParams(final String queryString,
final int lines,
final int itemsPerPage,
final RankingProfile ranking,
final Bitfield constraint) {
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) {
@ -110,7 +110,7 @@ public final class QueryParams {
this.maxDistance = Integer.MAX_VALUE;
this.prefer = "";
this.contentdom = CONTENTDOM_ALL;
this.linesPerPage = lines;
this.itemsPerPage = itemsPerPage;
this.offset = 0;
this.urlMask = ".*";
this.targetlang = "en";
@ -139,7 +139,7 @@ public final class QueryParams {
final String language,
final String navigators,
final boolean onlineSnippetFetch,
final int lines, final int offset, final String urlMask,
final int itemsPerPage, final int offset, final String urlMask,
final int domType, final int domMaxTargets,
final Bitfield constraint, final boolean allofconstraint,
final String site,
@ -156,7 +156,7 @@ public final class QueryParams {
this.maxDistance = maxDistance;
this.prefer = prefer;
this.contentdom = contentdom;
this.linesPerPage = Math.min((specialRights) ? 1000 : 50, lines);
this.itemsPerPage = Math.min((specialRights) ? 1000 : 50, itemsPerPage);
this.offset = Math.min((specialRights) ? 10000 : 100, offset);
this.urlMask = urlMask;
assert language != null;
@ -178,12 +178,12 @@ public final class QueryParams {
public int neededResults() {
// the number of result lines that must be computed
return this.offset + this.linesPerPage;
return this.offset + this.itemsPerPage;
}
public int displayResults() {
// the number of result lines that are displayed at once (size of result page)
return this.linesPerPage;
return this.itemsPerPage;
}
public void setOffset(final int newOffset) {

@ -50,7 +50,7 @@ public class ResultFetcher {
// input values
final RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
final QueryParams query;
QueryParams query;
private final Segment indexSegment;
private final yacySeedDB peers;
@ -91,23 +91,18 @@ public class ResultFetcher {
}
// start worker threads to fetch urls and snippets
this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
for (int i = 0; i < this.workerThreads.length; i++) {
this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0);
this.workerThreads[i].start();
}
this.workerThreads = null;
deployWorker(10);
serverProfiling.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), this.workerThreads.length + " online snippet fetch threads started", 0, 0), false);
}
public void restartWorker() {
public void deployWorker(int neededResults) {
if (anyWorkerAlive()) return;
this.workerThreads = new Worker[workerThreadCount];
Worker worker;
for (int i = 0; i < workerThreads.length; i++) {
worker = new Worker(i, 6000, (query.onlineSnippetFetch) ? 2 : 0);
worker.start();
workerThreads[i] = worker;
this.workerThreads = new Worker[(query.onlineSnippetFetch) ? workerThreadCount : 1];
for (int i = 0; i < workerThreads.length; i++) {
this.workerThreads[i] = new Worker(i, 10000, (query.onlineSnippetFetch) ? 2 : 0, neededResults);
this.workerThreads[i].start();
}
}
@ -136,12 +131,14 @@ public class ResultFetcher {
private long lastLifeSign; // when the last time the run()-loop was executed
private final int id;
private int snippetMode;
private int neededResults;
public Worker(final int id, final long maxlifetime, int snippetMode) {
public Worker(final int id, final long maxlifetime, int snippetMode, int neededResults) {
this.id = id;
this.snippetMode = snippetMode;
this.lastLifeSign = System.currentTimeMillis();
this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
this.neededResults = neededResults;
}
public void run() {
@ -152,6 +149,7 @@ public class ResultFetcher {
boolean nav_topics = query.navigators.equals("all") || query.navigators.indexOf("topics") >= 0;
try {
while (System.currentTimeMillis() < this.timeout) {
if (result.size() >= neededResults) break;
this.lastLifeSign = System.currentTimeMillis();
// check if we have enough
@ -285,10 +283,24 @@ public class ResultFetcher {
return this.result.element(item).element;
}
System.out.println("rankedCache.size() = " + this.rankedCache.size());
System.out.println("result.size() = " + this.result.size());
System.out.println("query.neededResults() = " + query.neededResults());
if ((!anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (images.size() + 30 < query.neededResults())) ||
(this.result.size() < query.neededResults())) &&
//(event.query.onlineSnippetFetch) &&
(this.rankedCache.size() > this.result.size())
) {
// start worker threads to fetch urls and snippets
deployWorker(query.neededResults());
}
// finally wait until enough results are there produced from the
// snippet fetch process
while ((anyWorkerAlive()) && (result.size() <= item)) {
try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {}
try {Thread.sleep((item % query.itemsPerPage) * 50L);} catch (final InterruptedException e) {}
}
// finally, if there is something, return the result

@ -66,7 +66,7 @@ public final class SearchEvent {
private final Segment indexSegment;
private final yacySeedDB peers;
private RankingProcess rankedCache; // ordered search results, grows dynamically as all the query threads enrich this container
private ResultFetcher snippets;
private ResultFetcher results;
// class variables for search abstracts
private final IndexAbstracts rcAbstracts; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
@ -176,7 +176,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.snippets = new ResultFetcher(rankedCache, query, indexSegment, peers);
this.results = new ResultFetcher(rankedCache, query, indexSegment, peers);
// clean up events
SearchEventCache.cleanupEvents(false);
@ -201,11 +201,12 @@ public final class SearchEvent {
public void setQuery(QueryParams query) {
this.query = query;
this.results.query = query;
}
public void cleanup() {
// execute deletion of failed words
int rw = this.snippets.failedURLs.size();
int rw = this.results.failedURLs.size();
if (rw > 0) {
final TreeSet<byte[]> removeWords = query.queryHashes;
removeWords.addAll(query.excludeHashes);
@ -213,7 +214,7 @@ public final class SearchEvent {
final Iterator<byte[]> j = removeWords.iterator();
// remove the same url hashes for multiple words
while (j.hasNext()) {
this.indexSegment.termIndex().remove(j.next(), this.snippets.failedURLs.keySet());
this.indexSegment.termIndex().remove(j.next(), this.results.failedURLs.keySet());
}
} catch (IOException e) {
e.printStackTrace();
@ -311,16 +312,8 @@ public final class SearchEvent {
// remote search requests, wait that the local process terminates first
try {localSearchThread.join();} catch (InterruptedException e) {}
}
// now wait until as many remote worker threads have finished, as we
// want to display results
while (this.primarySearchThreads != null &&
this.primarySearchThreads.length > item &&
this.snippets.anyWorkerAlive() &&
(this.snippets.resultCount() <= item || countFinishedRemoteSearch() <= item)) {
try {Thread.sleep(item * 50L);} catch (final InterruptedException e) {}
}
}
return this.snippets.oneResult(item);
return this.results.oneResult(item);
}
boolean secondarySearchStartet = false;
@ -401,7 +394,7 @@ public final class SearchEvent {
}
public ResultFetcher result() {
return this.snippets;
return this.results;
}
}

@ -90,20 +90,8 @@ public class SearchEventCache {
}
}
if (event == null) {
// generate a new event
// start a new event
event = new SearchEvent(query, indexSegment, peers, crawlResults, preselectedPeerHashes, generateAbstracts);
} else {
// if worker threads had been alive, but did not succeed, start them again to fetch missing links
if ((!event.result().anyWorkerAlive()) &&
(((query.contentdom == QueryParams.CONTENTDOM_IMAGE) && (event.result().images.size() + 30 < query.neededResults())) ||
(event.result().result.size() < query.neededResults() + 10)) &&
//(event.query.onlineSnippetFetch) &&
(event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize() > event.result().result.size())) {
// set new timeout
event.resetEventTime();
// start worker threads to fetch urls and snippets
event.result().restartWorker();
}
}
return event;

Loading…
Cancel
Save