fixed several search bugs

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7180 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · 84a023cbc8
parent 937dd956d3
commit 84a023cbc8
10 changed files with 21 additions and 22 deletions
--- a/htroot/ViewImage.java
+++ b/htroot/ViewImage.java
@ -92,7 +92,7 @@ public class ViewImage {
            if (url != null) try {
                resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CrawlProfile.CacheStrategy.IFEXIST);
            } catch (IOException e) {
-                Log.logWarning("ViewImage", "cannot load: " + e.getMessage());
+                Log.logFine("ViewImage", "cannot load: " + e.getMessage());
            }
            byte[] imgb = null;
            if (resourceb == null) {
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@ -134,7 +134,7 @@ document.getElementById("Enter").value = "search again";
 	<p><strong>The following words are stop-words and had been excluded from the search: #[stopwords]#.</strong></p>
 #(/excluded)#

-<!-- type the number of results -->
+<!-- type the number of results and navigation bar -->
 #(num-results)#
 	::
 	<p>No Results.</p>
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -97,7 +97,7 @@ public class yacysearch {
        // get query
        String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
        String querystring =  originalquerystring.replace('+', ' ');
-        CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
+        CrawlProfile.CacheStrategy snippetFetchStrategy = (post != null && post.get("verify", "false").equals("true")) ? CrawlProfile.CacheStrategy.IFFRESH : CrawlProfile.CacheStrategy.parse(post.get("verify", "cacheonly"));
        if (snippetFetchStrategy == null) snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
        final serverObjects prop = new serverObjects();

@ -237,7 +237,7 @@ public class yacysearch {
            Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
        } else if (Domains.matchesList(client, sb.networkWhitelist)) {
            Log.logInfo("LOCAL_SEARCH", "ACCECC CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
-        } else if (global || snippetFetchStrategy.isAllowedToFetchOnline()) {
+        } else if (!authenticated && (global || snippetFetchStrategy.isAllowedToFetchOnline())) {
            // in case that we do a global search or we want to fetch snippets, we check for DoS cases
            synchronized (trackerHandles) {
                int accInOneSecond = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size();
--- a/source/de/anomic/search/QueryParams.java
+++ b/source/de/anomic/search/QueryParams.java
@ -222,10 +222,6 @@ public final class QueryParams {
        return this.contentdom.toString();
    }
    
-    public boolean isGlobal() {
-        return this.domType != SEARCHDOM_LOCAL;
-    }
-    
    public boolean isLocal() {
        return this.domType == SEARCHDOM_LOCAL;
    }
@ -418,8 +414,8 @@ public final class QueryParams {
            context.append('-');
            context.append(hashSet2hashString(this.excludeHashes));
        }
-        context.append(asterisk);
-        context.append(this.domType);
+        //context.append(asterisk);
+        //context.append(this.domType);
        context.append(asterisk);
        context.append(this.contentdom);
        context.append(asterisk);
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@ -347,9 +347,9 @@ public final class RankingProcess extends Thread {
     * @param timeout the time this method may take for a result computation
     * @return a metadata entry for a url
     */
-    public URIMetadataRow takeURL(final boolean skipDoubleDom, final int timeout) {
+    public URIMetadataRow takeURL(final boolean skipDoubleDom, final long timeout) {
        // returns from the current RWI list the best URL entry and removes this entry from the list
-    	long timeLimit = System.currentTimeMillis() + timeout;
+    	long timeLimit = System.currentTimeMillis() + Math.max(10, timeout);
    	int p = -1;
    	byte[] urlhash;
    	long timeleft;
--- a/source/de/anomic/search/ResultFetcher.java
+++ b/source/de/anomic/search/ResultFetcher.java
@ -164,12 +164,12 @@ public class ResultFetcher {
                    if ((query.contentdom != ContentDomain.IMAGE) && (result.sizeAvailable() >= query.neededResults() + 10)) break;
    
                    // get next entry
-                    page = rankedCache.takeURL(true, taketimeout);
+                    page = rankedCache.takeURL(true, this.timeout - System.currentTimeMillis());
                    //if (page == null) page = rankedCache.takeURL(false, taketimeout);
                    if (page == null) break;
                    if (failedURLs.has(page.hash())) continue;
                    
-                    final ResultEntry resultEntry = fetchSnippet(page, query.host == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0
+                    final ResultEntry resultEntry = fetchSnippet(page, query.sitehash == null ? cacheStrategy : CacheStrategy.CACHEONLY); // does not fetch snippets if snippetMode == 0

                    if (resultEntry == null) continue; // the entry had some problems, cannot be used
                    //if (result.contains(resultEntry)) continue;
@ -228,7 +228,7 @@ public class ResultFetcher {
                    ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_indexof))),
                    180,
                    Integer.MAX_VALUE,
-                    query.isGlobal());
+                    !query.isLocal());
            final long snippetComputationTime = System.currentTimeMillis() - startTime;
            Log.logInfo("SEARCH", "text snippet load time for " + metadata.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
            
@ -247,7 +247,7 @@ public class ResultFetcher {
        } else {
            // attach media information
            startTime = System.currentTimeMillis();
-            final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, query.isGlobal());
+            final ArrayList<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(metadata.url(), snippetFetchWordHashes, query.contentdom, cacheStrategy, 6000, !query.isLocal());
            final long snippetComputationTime = System.currentTimeMillis() - startTime;
            Log.logInfo("SEARCH", "media snippet load time for " + metadata.url() + ": " + snippetComputationTime);
            
--- a/source/de/anomic/search/SearchEvent.java
+++ b/source/de/anomic/search/SearchEvent.java
@ -155,9 +155,9 @@ public final class SearchEvent {
        } else {
            // do a local search
            this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 1);
-            this.rankedCache.run(); // this is not started concurrently here on purpose!
            
            if (generateAbstracts) {
+                this.rankedCache.run(); // this is not started concurrently here on purpose!
                // compute index abstracts
                final long timer = System.currentTimeMillis();
                int maxcount = -1;
@ -182,6 +182,8 @@ public final class SearchEvent {
                    IAResults.put(wordhash, ReferenceContainer.compressIndex(container, null, 1000).toString());
                }
                EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.ABSTRACTS, "", this.rankedCache.searchContainerMap().size(), System.currentTimeMillis() - timer), false);
+            } else {
+                this.rankedCache.start(); // start concurrently
            }
            
            // start worker threads to fetch urls and snippets
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -475,11 +475,11 @@ public final class yacyClient {
                continue; // db-error
            }

-            if (urlEntry.snippet() != null) {
+            if (urlEntry.snippet() != null && urlEntry.snippet().length() > 0 && !urlEntry.snippet().equals("null")) {
                // we don't store the snippets along the url entry,
                // because they are search-specific.
                // instead, they are placed in a snipped-search cache.
-                // System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'");
+                // System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
                TextSnippet.storeToCache(wordhashes, new String(urlEntry.hash()), urlEntry.snippet());
            }
            
--- a/source/net/yacy/YaCySearchClient.java
+++ b/source/net/yacy/YaCySearchClient.java
@ -76,10 +76,11 @@ public class YaCySearchClient {
    }

    public static class RSSEntry {
-        String title, link;
+        String title, link, snippet;
        public RSSEntry(Element element) {
            title = val(element, "title", "");
            link  = val(element, "link", "");
+            snippet = val(element, "description", "");
        }
        private String val(Element parent, String label, String dflt) {
            Element e = (Element) parent.getElementsByTagName(label).item(0);
@ -88,7 +89,7 @@ public class YaCySearchClient {
                    ((CharacterData) child).getData() : dflt;
        }
        public String toString() {
-            return "Title : " + title + "\nLink  : " + link + "\n";
+            return "Title      : " + title + "\nLink       : " + link + "\nDescription: " + snippet + "\n";
        }
    }
    
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@ -147,7 +147,7 @@ public class genericImageParser extends AbstractParser implements Parser {
                            props.put(tag.getTagName(), tag.getDescription());
                            ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n");
                        } catch (MetadataException e) {
-                            Log.logException(e);
+                            //Log.logException(e);
                        }
                    }
                    title = props.get("Image Description");