From 84a023cbc818153ca421791dd8008ad43955576d Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 21 Sep 2010 21:48:42 +0000
Subject: [PATCH] fixed several search bugs

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7180 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/ViewImage.java                                     | 2 +-
 htroot/yacysearch.html                                    | 2 +-
 htroot/yacysearch.java                                    | 4 ++--
 source/de/anomic/search/QueryParams.java                  | 8 ++------
 source/de/anomic/search/RankingProcess.java               | 4 ++--
 source/de/anomic/search/ResultFetcher.java                | 8 ++++----
 source/de/anomic/search/SearchEvent.java                  | 4 +++-
 source/de/anomic/yacy/yacyClient.java                     | 4 ++--
 source/net/yacy/YaCySearchClient.java                     | 5 +++--
 .../yacy/document/parser/images/genericImageParser.java   | 2 +-
 10 files changed, 21 insertions(+), 22 deletions(-)
diff --git a/htroot/ViewImage.java b/htroot/ViewImage.java
index 1ff4ed4bd..330078f0c 100644
--- a/htroot/ViewImage.java
+++ b/htroot/ViewImage.java
@@ -92,7 +92,7 @@ public class ViewImage {
             if (url != null) try {
                 resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CrawlProfile.CacheStrategy.IFEXIST);
             } catch (IOException e) {
-                Log.logWarning("ViewImage", "cannot load: " + e.getMessage());
+                Log.logFine("ViewImage", "cannot load: " + e.getMessage());
             }
             byte[] imgb = null;
             if (resourceb == null) {
diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html
index 532f1b922..aa8a52062 100644
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@@ -134,7 +134,7 @@ document.getElementById("Enter").value = "search again";
 	<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
 #(/excluded)#
 
-<!-- type the number of results -->
+<!-- type the number of results and navigation bar -->
 #(num-results)#
 	::
 	<p>No Results.</p>
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index d9ae825ed..53d75bf4b 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -97,7 +97,7 @@ public class yacysearch {
         // get query
         String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
         String querystring =  originalquerystring.replace('+', ' ');
-        CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
+        CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFFRESH : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
         if (snippetFetchStrategy == null) snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
         final serverObjects prop = new serverObjects();
 
@@ -237,7 +237,7 @@ public class yacysearch {
             Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
         } else if (Domains.matchesList(client, sb.networkWhitelist)) {
             Log.logInfo("LOCAL_SEARCH", "ACCECC CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
-        } else if (global || snippetFetchStrategy.isAllowedToFetchOnline()) {
+        } else if (!authenticated && (global || snippetFetchStrategy.isAllowedToFetchOnline())) {
             // in case that we do a global search or we want to fetch snippets, we check for DoS cases
             synchronized (trackerHandles) {
                 int accInOneSecond = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size();
diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java
index 3a985a210..8279d1fb2 100644
--- a/source/de/anomic/search/QueryParams.java
+++ b/source/de/anomic/search/QueryParams.java
@@ -222,10 +222,6 @@ public final class QueryParams {
         return this.contentdom.toString();
     }
     
-    public boolean isGlobal() {
-        return this.domType != SEARCHDOM_LOCAL;
-    }
-    
     public boolean isLocal() {
         return this.domType == SEARCHDOM_LOCAL;
     }
@@ -418,8 +414,8 @@ public final class QueryParams {
             context.append('-');
             context.append(hashSet2hashString(this.excludeHashes));
         }
-        context.append(asterisk);
-        context.append(this.domType);
+        //context.append(asterisk);
+        //context.append(this.domType);
         context.append(asterisk);
         context.append(this.contentdom);
         context.append(asterisk);
diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java
index fc692cd4b..97e77e144 100644
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@@ -347,9 +347,9 @@ public final class RankingProcess extends Thread {
      * @param timeout the time this method may take for a result computation
      * @return a metadata entry for a url
      */
-    public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) {
+    public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) {
         // returns from the current RWI list the best URL entry and removes this entry from the list
-    	long timeLimit = System.currentTimeMillis() + timeout;
+    	long timeLimit = System.currentTimeMillis() + Math.max(10, timeout);
     	int p = -1;
     	byte[] urlhash;
     	long timeleft;
diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java
index 7865249e4..296a1b125 100644
--- a/source/de/anomic/search/ResultFetcher.java
+++ b/source/de/anomic/search/ResultFetcher.java
@@ -164,12 +164,12 @@ public class ResultFetcher {
                     if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break;
     
                     // get next entry
-                    page = rankedCache.takeURL(true, taketimeout);
+                    page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis());
                     //if (page == null) page = rankedCache.takeURL(false, taketimeout);
                     if (page == null) break;
                     if (failedURLs.has(page.hash())) continue;
                     
-                    final ResultEntry resultEntry = fetchSnippet(page, query.host == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
+                    final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
 
                     if (resultEntry == null) continue; // the entry had some problems, cannot be used
                     //if (result.contains(resultEntry)) continue;
@@ -228,7 +228,7 @@ public class ResultFetcher {
                     ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))),
                     180,
                     Integer.MAX_VALUE,
-                    query.isGlobal());
+                    !query.isLocal());
             final long snippetComputationTime = System.currentTimeMillis() - startTime;
             Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
             
@@ -247,7 +247,7 @@ public class ResultFetcher {
         } else {
             // attach media information
             startTime = System.currentTimeMillis();
-            final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, query.isGlobal());
+            final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal());
             final long snippetComputationTime = System.currentTimeMillis() - startTime;
             Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime);
             
diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java
index 5fa072f50..eadf685db 100644
--- a/source/de/anomic/search/SearchEvent.java
+++ b/source/de/anomic/search/SearchEvent.java
@@ -155,9 +155,9 @@ public final class SearchEvent {
         } else {
             // do a local search
             this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1);
-            this.rankedCache.run(); // this is not started concurrently here on purpose!
             
             if (generateAbstracts) {
+                this.rankedCache.run(); // this is not started concurrently here on purpose!
                 // compute index abstracts
                 final long timer = System.currentTimeMillis();
                 int maxcount = -1;
@@ -182,6 +182,8 @@ public final class SearchEvent {
                     IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
                 }
                 EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
+            } else {
+                this.rankedCache.start(); // start concurrently
             }
             
             // start worker threads to fetch urls and snippets
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 223333dce..495310f01 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -475,11 +475,11 @@ public final class yacyClient {
                 continue; // db-error
             }
 
-            if (urlEntry.snippet() != null) {
+            if (urlEntry.snippet() != null && urlEntry.snippet().length() > 0 && !urlEntry.snippet().equals("null")) {
                 // we don't store the snippets along the url entry,
                 // because they are search-specific.
                 // instead, they are placed in a snipped-search cache.
-                // System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'");
+                // System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
                 TextSnippet.storeToCache(wordhashes, new String(urlEntry.hash()), urlEntry.snippet());
             }
             
diff --git a/source/net/yacy/YaCySearchClient.java b/source/net/yacy/YaCySearchClient.java
index 958af9e38..cd88b7f55 100644
--- a/source/net/yacy/YaCySearchClient.java
+++ b/source/net/yacy/YaCySearchClient.java
@@ -76,10 +76,11 @@ public class YaCySearchClient {
     }
 
     public static class RSSEntry {
-        String title, link;
+        String title, link, snippet;
         public RSSEntry(Element element) {
             title = val(element, "title", "");
             link  = val(element, "link", "");
+            snippet = val(element, "description", "");
         }
         private String val(Element parent, String label, String dflt) {
             Element e = (Element) parent.getElementsByTagName(label).item(0);
@@ -88,7 +89,7 @@ public class YaCySearchClient {
                     ((CharacterData) child).getData() : dflt;
         }
         public String toString() {
-            return "Title : " + title + "\nLink  : " + link + "\n";
+            return "Title      : " + title + "\nLink       : " + link + "\nDescription: " + snippet + "\n";
         }
     }
     
diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java
index 49ceff670..c8af53bb2 100644
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@@ -147,7 +147,7 @@ public class genericImageParser extends AbstractParser implements Parser {
                             props.put(tag.getTagName(), tag.getDescription());
                             ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n");
                         } catch (MetadataException e) {
-                            Log.logException(e);
+                            //Log.logException(e);
                         }
                     }
                     title = props.get("Image Description");

No Results.