From b5a29e96511e77db28dd8df95e9b921f4142886a Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 25 Nov 2006 00:38:09 +0000 Subject: [PATCH] - fix for snippets that are too short - added keyword to snippet fetch to suppres removal of not-found snippet words (for debugging) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3009 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/js/yacysearch.js | 2 +- htroot/xml/snippet.java | 5 ++++- source/de/anomic/plasma/plasmaSnippetCache.java | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/htroot/js/yacysearch.js b/htroot/js/yacysearch.js index 9be0f1778..418d7815b 100644 --- a/htroot/js/yacysearch.js +++ b/htroot/js/yacysearch.js @@ -13,7 +13,7 @@ function AllSnippets() { function requestSnippet(url, query){ var request=createRequestObject(); - request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&search=' + escape(query),true); + request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&search=' + escape(query) + '&remove=true',true); request.onreadystatechange = function () {handleState(request)}; request.send(null); } diff --git a/htroot/xml/snippet.java b/htroot/xml/snippet.java index d2d8c5202..7ad5973c1 100644 --- a/htroot/xml/snippet.java +++ b/htroot/xml/snippet.java @@ -25,6 +25,9 @@ public class snippet { String urlString = post.get("url", ""); URL url = new URL(urlString); + // if 'remove' is set to true, then RWI references to URLs that do not have the snippet are removed + boolean remove = post.get("remove", "false").equals("true"); + String querystring = post.get("search", "").trim(); if ((querystring.length() > 2) && (querystring.charAt(0) == '"') && (querystring.charAt(querystring.length() - 1) == '"')) { querystring = querystring.substring(1, querystring.length() - 1).trim(); @@ -47,7 +50,7 @@ public class snippet { prop.put("text", (snippet.exists()) ? "" : "unknown"); } else { String error = snippet.getError(); - if (error.equals("no matching snippet found")) { + if ((remove) && (error.equals("no matching snippet found"))) { switchboard.removeReferences(plasmaURL.urlHash(url), query); } prop.put("text", error); diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index d7caaf557..ada1fad86 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -269,7 +269,7 @@ public class plasmaSnippetCache { * COMPUTE SNIPPET * =========================================================================== */ // we have found a parseable non-empty file: use the lines - line = computeSnippet(sentences, queryhashes, 8 + 6 * queryhashes.size(), snippetMaxLength); + line = computeSnippet(sentences, queryhashes, 3 * queryhashes.size(), snippetMaxLength); //System.out.println("loaded snippet for URL " + url + ": " + line); if (line == null) return new Snippet(null, ERROR_NO_MATCH, "no matching snippet found"); if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength); @@ -378,7 +378,7 @@ public class plasmaSnippetCache { String sentence; while (sentences.hasMoreElements()) { sentence = (String) sentences.nextElement(); - //System.out.println("Sentence " + i + ": " + sentences[i]); + //System.out.println("Snippet-Sentence :" + sentence); // DEBUG if (sentence.length() > minLength) { hs = hashSentence(sentence); j = queryhashes.iterator();