From 89169d54fd1f7c7611ba1d347fbb49ed192a3483 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 2 Feb 2008 00:16:00 +0000 Subject: [PATCH] fixed search result preparation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4427 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacysearch.html | 2 +- .../de/anomic/plasma/plasmaSearchEvent.java | 14 ++++++++++- .../de/anomic/plasma/plasmaSnippetCache.java | 25 +++++++++++++++---- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 4f0ddfd22..7b6ab5fa9 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -117,7 +117,7 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results" #{/results}# #(resultTable)#::#(/resultTable)# - + diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 5f783d174..43931b601 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -58,7 +58,7 @@ public final class plasmaSearchEvent { public static final String URLFETCH = "urlfetch"; public static final String NORMALIZING = "normalizing"; - public static int workerThreadCount = 8; + public static int workerThreadCount = 10; public static String lastEventID = ""; private static HashMap lastEvents = new HashMap(); // a cache for objects from this class: re-use old search requests public static final long eventLifetime = 600000; // the time an event will stay in the cache, 10 Minutes @@ -391,6 +391,17 @@ public final class plasmaSearchEvent { } return false; } + + private int countFinishedWorkerThreads() { + if (this.workerThreads == null) return workerThreadCount; + int c = 0; + for (int i = 0; i < workerThreadCount; i++) { + if ((this.workerThreads[i] == null) || + !(this.workerThreads[i].isAlive()) || + (this.workerThreads[i].busytime() >= 3000)) c++; + } + return c; + } private boolean anyRemoteSearchAlive() { // check primary search threads @@ -576,6 +587,7 @@ public final class plasmaSearchEvent { public ResultEntry oneResult(int item) { // first sleep a while to give accumulation threads a chance to work while (((localSearchThread != null) && (localSearchThread.isAlive())) || + ((countFinishedWorkerThreads() <= item) && (item < workerThreadCount)) || ((this.primarySearchThreads != null) && (this.primarySearchThreads.length > item) && (anyWorkerAlive()) && ((this.resultList.size() <= item) || (countFinishedRemoteSearch() <= item)))) { try {Thread.sleep(100);} catch (InterruptedException e) {} diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 9c1587dfd..2494a17be 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -76,6 +76,7 @@ public class plasmaSnippetCache { public static final int SOURCE_CACHE = 0; public static final int SOURCE_FILE = 1; public static final int SOURCE_WEB = 2; + public static final int SOURCE_METADATA = 3; public static final int ERROR_NO_HASH_GIVEN = 11; public static final int ERROR_SOURCE_LOADING = 12; @@ -280,11 +281,16 @@ public class plasmaSnippetCache { if ((resContentLength > maxDocLen) && (!fetchOnline)) { // content may be too large to be parsed here. To be fast, we omit calculation of snippet here return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "resource available, but too large: " + resContentLength + " bytes"); - }/* - } else if (url.) { + } + } else if (containsAllHashes(comp.dc_title(), queryhashes)) { // try to create the snippet from information given in the url itself - */ - + return new TextSnippet(url, (comp.dc_subject().length() > 0) ? comp.dc_creator() : comp.dc_subject(), SOURCE_METADATA, null, null, faviconCache.get(url.hash())); + } else if (containsAllHashes(comp.dc_creator(), queryhashes)) { + // try to create the snippet from information given in the creator metadata + return new TextSnippet(url, comp.dc_creator(), SOURCE_METADATA, null, null, faviconCache.get(url.hash())); + } else if (containsAllHashes(comp.dc_subject(), queryhashes)) { + // try to create the snippet from information given in the subject metadata + return new TextSnippet(url, (comp.dc_creator().length() > 0) ? comp.dc_creator() : comp.dc_subject(), SOURCE_METADATA, null, null, faviconCache.get(url.hash())); } else if (fetchOnline) { // if not found try to download it @@ -741,7 +747,16 @@ public class plasmaSnippetCache { } return map; } - + + private static boolean containsAllHashes(String sentence, Set queryhashes) { + HashMap m = hashSentence(sentence); + Iterator i = queryhashes.iterator(); + while (i.hasNext()) { + if (!(m.containsKey(i.next()))) return false; + } + return true; + } + public static plasmaParserDocument parseDocument(yacyURL url, long contentLength, InputStream resourceStream) throws ParserException { return parseDocument(url, contentLength, resourceStream, null); }