From 325ba7bfb87792b7849f39701c37f197fdc1e28e Mon Sep 17 00:00:00 2001 From: lotus Date: Wed, 26 Nov 2008 16:41:38 +0000 Subject: [PATCH] only query words with length > 2 this is not complete, yet git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5368 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacysearchitem.java | 2 +- source/de/anomic/plasma/plasmaSearchQuery.java | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 00d516489..6b49446b0 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -227,7 +227,7 @@ public class yacysearchitem { ((yacyURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") + (((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : "")); final plasmaSnippetCache.TextSnippet snippet = result.textSnippet(); - prop.put("content_snippet", (snippet == null) ? "" : snippet.getLineMarked(theQuery.queryHashes)); + prop.put("content_snippet", (snippet == null) ? "" : snippet.getLineMarked(theQuery.fullqueryHashes)); serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.FINALIZATION + "-" + item, 0, 0)); return prop; diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index 50c40f0b8..854eccb42 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -56,7 +56,7 @@ public final class plasmaSearchQuery { public static final kelondroBitfield catchall_constraint = new kelondroBitfield(4, "______"); public String queryString; - public TreeSet queryHashes, excludeHashes; + public TreeSet fullqueryHashes, queryHashes, excludeHashes; public int linesPerPage, offset; public String prefer; public int contentdom; @@ -92,6 +92,7 @@ public final class plasmaSearchQuery { final TreeSet[] cq = cleanQuery(queryString); this.queryHashes = indexWord.words2hashes(cq[0]); this.excludeHashes = indexWord.words2hashes(cq[1]); + this.fullqueryHashes = indexWord.words2hashes(cq[2]); } this.ranking = ranking; this.maxDistance = Integer.MAX_VALUE; @@ -129,6 +130,7 @@ public final class plasmaSearchQuery { this.queryString = queryString; this.queryHashes = queryHashes; this.excludeHashes = excludeHashes; + this.fullqueryHashes = queryHashes; //FIXME: refactor this method to get the proper hashes this.ranking = ranking; this.maxDistance = maxDistance; this.prefer = prefer; @@ -235,7 +237,7 @@ public final class plasmaSearchQuery { @SuppressWarnings("unchecked") public static TreeSet[] cleanQuery(String querystring) { - // returns two sets: a query set and a exclude set + // returns three sets: a query set, a exclude set and a full query set if ((querystring == null) || (querystring.length() == 0)) return new TreeSet[]{new TreeSet(kelondroNaturalOrder.naturalComparator), new TreeSet(kelondroNaturalOrder.naturalComparator)}; // convert Umlaute @@ -245,22 +247,27 @@ public final class plasmaSearchQuery { while ((c = querystring.indexOf(seps.charAt(i))) >= 0) { querystring = querystring.substring(0, c) + (((c + 1) < querystring.length()) ? (" " + querystring.substring(c + 1)) : ""); } } + String s; // the string is clean now, but we must generate a set out of it final TreeSet query = new TreeSet(kelondroNaturalOrder.naturalComparator); final TreeSet exclude = new TreeSet(kelondroNaturalOrder.naturalComparator); + final TreeSet fullquery = new TreeSet(kelondroNaturalOrder.naturalComparator); final String[] a = querystring.split(" "); for (int i = 0; i < a.length; i++) { if (a[i].startsWith("-")) { exclude.add(a[i].substring(1)); } else { while ((c = a[i].indexOf('-')) >= 0) { - query.add(a[i].substring(0, c)); + s = a[i].substring(0, c); + if(s.length() > 2) query.add(s); + fullquery.add(s); a[i] = a[i].substring(c + 1); } - if (a[i].length() > 0) query.add(a[i]); + if (a[i].length() > 2) query.add(a[i]); + fullquery.add(a[i]); } } - return new TreeSet[]{query, exclude}; + return new TreeSet[]{query, exclude, fullquery}; } public String queryString(final boolean encodeHTML) {