From 84a023cbc818153ca421791dd8008ad43955576d Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 21 Sep 2010 21:48:42 +0000 Subject: [PATCH] fixed several search bugs git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7180 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ViewImage.java | 2 +- htroot/yacysearch.html | 2 +- htroot/yacysearch.java | 4 ++-- source/de/anomic/search/QueryParams.java | 8 ++------ source/de/anomic/search/RankingProcess.java | 4 ++-- source/de/anomic/search/ResultFetcher.java | 8 ++++---- source/de/anomic/search/SearchEvent.java | 4 +++- source/de/anomic/yacy/yacyClient.java | 4 ++-- source/net/yacy/YaCySearchClient.java | 5 +++-- .../yacy/document/parser/images/genericImageParser.java | 2 +- 10 files changed, 21 insertions(+), 22 deletions(-) diff --git a/htroot/ViewImage.java b/htroot/ViewImage.java index 1ff4ed4bd..330078f0c 100644 --- a/htroot/ViewImage.java +++ b/htroot/ViewImage.java @@ -92,7 +92,7 @@ public class ViewImage { if (url != null) try { resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CrawlProfile.CacheStrategy.IFEXIST); } catch (IOException e) { - Log.logWarning("ViewImage", "cannot load: " + e.getMessage()); + Log.logFine("ViewImage", "cannot load: " + e.getMessage()); } byte[] imgb = null; if (resourceb == null) { diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index 532f1b922..aa8a52062 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -134,7 +134,7 @@ document.getElementById("Enter").value = "search again";

The following words are stop-words and had been excluded from the search: #[stopwords]#.

#(/excluded)# - + #(num-results)# ::

No Results.

diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index d9ae825ed..53d75bf4b 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -97,7 +97,7 @@ public class yacysearch { // get query String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim(); String querystring = originalquerystring.replace('+', ' '); - CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly")); + CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFFRESH : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly")); if (snippetFetchStrategy == null) snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY; final serverObjects prop = new serverObjects(); @@ -237,7 +237,7 @@ public class yacysearch { Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search"); } else if (Domains.matchesList(client, sb.networkWhitelist)) { Log.logInfo("LOCAL_SEARCH", "ACCECC CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions"); - } else if (global || snippetFetchStrategy.isAllowedToFetchOnline()) { + } else if (!authenticated && (global || snippetFetchStrategy.isAllowedToFetchOnline())) { // in case that we do a global search or we want to fetch snippets, we check for DoS cases synchronized (trackerHandles) { int accInOneSecond = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size(); diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index 3a985a210..8279d1fb2 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -222,10 +222,6 @@ public final class QueryParams { return this.contentdom.toString(); } - public boolean isGlobal() { - return this.domType != SEARCHDOM_LOCAL; - } - public boolean isLocal() { return this.domType == SEARCHDOM_LOCAL; } @@ -418,8 +414,8 @@ public final class QueryParams { context.append('-'); context.append(hashSet2hashString(this.excludeHashes)); } - context.append(asterisk); - context.append(this.domType); + //context.append(asterisk); + //context.append(this.domType); context.append(asterisk); context.append(this.contentdom); context.append(asterisk); diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index fc692cd4b..97e77e144 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -347,9 +347,9 @@ public final class RankingProcess extends Thread { * @param timeout the time this method may take for a result computation * @return a metadata entry for a url */ - public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) { + public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) { // returns from the current RWI list the best URL entry and removes this entry from the list - long timeLimit = System.currentTimeMillis() + timeout; + long timeLimit = System.currentTimeMillis() + Math.max(10, timeout); int p = -1; byte[] urlhash; long timeleft; diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index 7865249e4..296a1b125 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -164,12 +164,12 @@ public class ResultFetcher { if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break; // get next entry - page = rankedCache.takeURL(true, taketimeout); + page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis()); //if (page == null) page = rankedCache.takeURL(false, taketimeout); if (page == null) break; if (failedURLs.has(page.hash())) continue; - final ResultEntry resultEntry = fetchSnippet(page, query.host == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0 + final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0 if (resultEntry == null) continue; // the entry had some problems, cannot be used //if (result.contains(resultEntry)) continue; @@ -228,7 +228,7 @@ public class ResultFetcher { ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))), 180, Integer.MAX_VALUE, - query.isGlobal()); + !query.isLocal()); final long snippetComputationTime = System.currentTimeMillis() - startTime; Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); @@ -247,7 +247,7 @@ public class ResultFetcher { } else { // attach media information startTime = System.currentTimeMillis(); - final ArrayList mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, query.isGlobal()); + final ArrayList mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal()); final long snippetComputationTime = System.currentTimeMillis() - startTime; Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime); diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 5fa072f50..eadf685db 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -155,9 +155,9 @@ public final class SearchEvent { } else { // do a local search this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1); - this.rankedCache.run(); // this is not started concurrently here on purpose! if (generateAbstracts) { + this.rankedCache.run(); // this is not started concurrently here on purpose! // compute index abstracts final long timer = System.currentTimeMillis(); int maxcount = -1; @@ -182,6 +182,8 @@ public final class SearchEvent { IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString()); } EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false); + } else { + this.rankedCache.start(); // start concurrently } // start worker threads to fetch urls and snippets diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 223333dce..495310f01 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -475,11 +475,11 @@ public final class yacyClient { continue; // db-error } - if (urlEntry.snippet() != null) { + if (urlEntry.snippet() != null && urlEntry.snippet().length() > 0 && !urlEntry.snippet().equals("null")) { // we don't store the snippets along the url entry, // because they are search-specific. // instead, they are placed in a snipped-search cache. - // System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'"); + // System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'"); TextSnippet.storeToCache(wordhashes, new String(urlEntry.hash()), urlEntry.snippet()); } diff --git a/source/net/yacy/YaCySearchClient.java b/source/net/yacy/YaCySearchClient.java index 958af9e38..cd88b7f55 100644 --- a/source/net/yacy/YaCySearchClient.java +++ b/source/net/yacy/YaCySearchClient.java @@ -76,10 +76,11 @@ public class YaCySearchClient { } public static class RSSEntry { - String title, link; + String title, link, snippet; public RSSEntry(Element element) { title = val(element, "title", ""); link = val(element, "link", ""); + snippet = val(element, "description", ""); } private String val(Element parent, String label, String dflt) { Element e = (Element) parent.getElementsByTagName(label).item(0); @@ -88,7 +89,7 @@ public class YaCySearchClient { ((CharacterData) child).getData() : dflt; } public String toString() { - return "Title : " + title + "\nLink : " + link + "\n"; + return "Title : " + title + "\nLink : " + link + "\nDescription: " + snippet + "\n"; } } diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java index 49ceff670..c8af53bb2 100644 --- a/source/net/yacy/document/parser/images/genericImageParser.java +++ b/source/net/yacy/document/parser/images/genericImageParser.java @@ -147,7 +147,7 @@ public class genericImageParser extends AbstractParser implements Parser { props.put(tag.getTagName(), tag.getDescription()); ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n"); } catch (MetadataException e) { - Log.logException(e); + //Log.logException(e); } } title = props.get("Image Description");