changed storage of search words: keep order

pull/1/head
Michael Peter Christen 13 years ago
parent ed39ef2890
commit e2f8f263e8

@ -9,7 +9,7 @@
// $LastChangedBy: orbiter $ // $LastChangedBy: orbiter $
// //
// LICENSE // LICENSE
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -24,9 +24,9 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.TreeSet;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -43,7 +43,6 @@ import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -52,18 +51,18 @@ public final class timeline {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements // return variable that accumulates replacements
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
if ((post == null) || (env == null)) return prop; if ((post == null) || (env == null)) return prop;
final boolean authenticated = sb.adminAuthenticated(header) >= 2; final boolean authenticated = sb.adminAuthenticated(header) >= 2;
Segment segment = null; Segment segment = null;
if (post.containsKey("segment") && authenticated) { if (post.containsKey("segment") && authenticated) {
segment = sb.indexSegments.segment(post.get("segment")); segment = sb.indexSegments.segment(post.get("segment"));
} else { } else {
segment = sb.indexSegments.segment(Segments.Process.PUBLIC); segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
} }
final String querystring = post.get("query", ""); // a string of word hashes that shall be searched and combined final String querystring = post.get("query", ""); // a string of word hashes that shall be searched and combined
final int count = Math.min((authenticated) ? 1000 : 10, post.getInt("maximumRecords", 1000)); // SRU syntax final int count = Math.min((authenticated) ? 1000 : 10, post.getInt("maximumRecords", 1000)); // SRU syntax
final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE); final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
@ -75,22 +74,22 @@ public final class timeline {
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent); language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
if (language == null) language = "en"; if (language == null) language = "en";
} }
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute final Collection<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute
HandleSet q = Word.words2hashesHandles(query[0]); HandleSet q = Word.words2hashesHandles(query[0]);
// tell all threads to do nothing for a specific time // tell all threads to do nothing for a specific time
sb.intermissionAllThreads(3000); sb.intermissionAllThreads(3000);
// prepare search // prepare search
final long timestamp = System.currentTimeMillis(); final long timestamp = System.currentTimeMillis();
// prepare an abstract result // prepare an abstract result
int indexabstractContainercount = 0; int indexabstractContainercount = 0;
int joincount = 0; int joincount = 0;
// retrieve index containers // retrieve index containers
//yacyCore.log.logInfo("INIT TIMELINE SEARCH: " + plasmaSearchQuery.anonymizedQueryHashes(query[0]) + " - " + count + " links"); //yacyCore.log.logInfo("INIT TIMELINE SEARCH: " + plasmaSearchQuery.anonymizedQueryHashes(query[0]) + " - " + count + " links");
// get the index container with the result vector // get the index container with the result vector
TermSearch<WordReference> search = null; TermSearch<WordReference> search = null;
try { try {
@ -99,7 +98,7 @@ public final class timeline {
Log.logException(e); Log.logException(e);
} }
ReferenceContainer<WordReference> index = search.joined(); ReferenceContainer<WordReference> index = search.joined();
Iterator<WordReference> i = index.entries(); Iterator<WordReference> i = index.entries();
WordReference entry; WordReference entry;
int c = 0; int c = 0;
@ -117,14 +116,14 @@ public final class timeline {
c++; c++;
} }
prop.put("event", c); prop.put("event", c);
// log // log
Network.log.logInfo("EXIT TIMELINE SEARCH: " + Network.log.logInfo("EXIT TIMELINE SEARCH: " +
QueryParams.anonymizedQueryHashes(q) + " - " + joincount + " links found, " + QueryParams.anonymizedQueryHashes(q) + " - " + joincount + " links found, " +
prop.get("linkcount", "?") + " links selected, " + prop.get("linkcount", "?") + " links selected, " +
indexabstractContainercount + " index abstracts, " + indexabstractContainercount + " index abstracts, " +
(System.currentTimeMillis() - timestamp) + " milliseconds"); (System.currentTimeMillis() - timestamp) + " milliseconds");
return prop; return prop;
} }

@ -602,14 +602,14 @@ public class yacysearch {
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", ""); (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
// the query // the query
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute final Collection<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE; final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE;
// filter out stopwords // filter out stopwords
final SortedSet<String> filtered = SetTools.joinConstructive(query[0], Switchboard.stopwords); final SortedSet<String> filtered = SetTools.joinConstructiveByTest(query[0], Switchboard.stopwords);
if ( !filtered.isEmpty() ) { if ( !filtered.isEmpty() ) {
SetTools.excludeDestructive(query[0], Switchboard.stopwords); SetTools.excludeDestructiveByTestSmallInLarge(query[0], Switchboard.stopwords);
} }
// if a minus-button was hit, remove a special reference first // if a minus-button was hit, remove a special reference first

@ -25,8 +25,8 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Set;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
@ -165,7 +165,7 @@ public class yacysearchitem {
prop.putHTML("content_publisher", result.publisher()); prop.putHTML("content_publisher", result.publisher());
prop.putHTML("content_creator", result.creator());// author prop.putHTML("content_creator", result.creator());// author
prop.putHTML("content_subject", result.subject()); prop.putHTML("content_subject", result.subject());
final Set<String>[] query = theQuery.queryWords(); final Collection<String>[] query = theQuery.queryWords();
final StringBuilder s = new StringBuilder(query[0].size() * 20); final StringBuilder s = new StringBuilder(query[0].size() * 20);
for (final String t: query[0]) { for (final String t: query[0]) {
s.append('+').append(t); s.append('+').append(t);

@ -26,6 +26,7 @@
package net.yacy.kelondro.data.word; package net.yacy.kelondro.data.word;
import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
@ -118,11 +119,11 @@ public class Word {
private final static byte lowByte = Base64Order.alpha_enhanced[0]; private final static byte lowByte = Base64Order.alpha_enhanced[0];
private final static byte highByte = Base64Order.alpha_enhanced[Base64Order.alpha_enhanced.length - 1]; private final static byte highByte = Base64Order.alpha_enhanced[Base64Order.alpha_enhanced.length - 1];
public static boolean isPrivate(byte[] hash) { public static boolean isPrivate(byte[] hash) {
return hash[0] == highByte && hash[1] == highByte && hash[2] == highByte && hash[3] == highByte && hash[4] == highByte; return hash[0] == highByte && hash[1] == highByte && hash[2] == highByte && hash[3] == highByte && hash[4] == highByte;
} }
// create a word hash // create a word hash
public static final byte[] word2hash(final String word) { public static final byte[] word2hash(final String word) {
final String wordlc = word.toLowerCase(Locale.ENGLISH); final String wordlc = word.toLowerCase(Locale.ENGLISH);
@ -148,7 +149,7 @@ public class Word {
public final static byte PRIVATE_TYPE_COPY = 'C'; // used for a private local copy of the index public final static byte PRIVATE_TYPE_COPY = 'C'; // used for a private local copy of the index
public final static byte PRIVATE_TYPE_PHONETIC = 'K'; // used for ColognePhonetics public final static byte PRIVATE_TYPE_PHONETIC = 'K'; // used for ColognePhonetics
public static final byte[] hash2private(final byte[] hash, byte privateType) { public static final byte[] hash2private(final byte[] hash, byte privateType) {
byte[] p = new byte[commonHashLength]; byte[] p = new byte[commonHashLength];
p[0] = highByte; p[1] = highByte; p[2] = highByte; ; p[3] = highByte; ; p[4] = highByte; p[5] = privateType; p[0] = highByte; p[1] = highByte; p[2] = highByte; ; p[3] = highByte; ; p[4] = highByte; p[5] = privateType;
@ -156,7 +157,7 @@ public class Word {
return p; return p;
} }
public static final HandleSet words2hashesHandles(final Set<String> words) { public static final HandleSet words2hashesHandles(final Collection<String> words) {
final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.size()); final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.size());
for (final String word: words) for (final String word: words)
try { try {

@ -49,12 +49,12 @@ import net.yacy.kelondro.logging.Log;
public final class SetTools { public final class SetTools {
//public static Comparator fastStringComparator = fastStringComparator(true); //public static Comparator fastStringComparator = fastStringComparator(true);
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// helper methods // helper methods
public static int log2a(int x) { public static int log2a(int x) {
// this computes 1 + log2 // this computes 1 + log2
// it is the number of bits in x, not the logarithm by 2 // it is the number of bits in x, not the logarithm by 2
@ -72,10 +72,10 @@ public final class SetTools {
// - join by pairwise enumeration // - join by pairwise enumeration
// - join by iterative tests (where we distinguish left-right and right-left tests) // - join by iterative tests (where we distinguish left-right and right-left tests)
public static <A, B> SortedMap<A, B> joinConstructive(final Collection<SortedMap<A, B>> maps, final boolean concatStrings) { public static <A, B> SortedMap<A, B> joinConstructive(final Collection<SortedMap<A, B>> maps, final boolean concatStrings) {
// this joins all TreeMap(s) contained in maps // this joins all TreeMap(s) contained in maps
// first order entities by their size // first order entities by their size
final SortedMap<Long, SortedMap<A, B>> orderMap = new TreeMap<Long, SortedMap<A, B>>(); final SortedMap<Long, SortedMap<A, B>> orderMap = new TreeMap<Long, SortedMap<A, B>>();
SortedMap<A, B> singleMap; SortedMap<A, B> singleMap;
@ -84,18 +84,18 @@ public final class SetTools {
while (i.hasNext()) { while (i.hasNext()) {
// get next entity: // get next entity:
singleMap = i.next(); singleMap = i.next();
// check result // check result
if ((singleMap == null) || (singleMap.isEmpty())) return new TreeMap<A, B>(); if ((singleMap == null) || (singleMap.isEmpty())) return new TreeMap<A, B>();
// store result in order of result size // store result in order of result size
orderMap.put(Long.valueOf(singleMap.size() * 1000 + count), singleMap); orderMap.put(Long.valueOf(singleMap.size() * 1000 + count), singleMap);
count++; count++;
} }
// check if there is any result // check if there is any result
if (orderMap.isEmpty()) return new TreeMap<A, B>(); if (orderMap.isEmpty()) return new TreeMap<A, B>();
// we now must pairwise build up a conjunction of these maps // we now must pairwise build up a conjunction of these maps
Long k = orderMap.firstKey(); // the smallest, which means, the one with the least entries Long k = orderMap.firstKey(); // the smallest, which means, the one with the least entries
SortedMap<A, B> mapA, mapB, joinResult = orderMap.remove(k); SortedMap<A, B> mapA, mapB, joinResult = orderMap.remove(k);
@ -114,7 +114,7 @@ public final class SetTools {
if (joinResult.isEmpty()) return new TreeMap<A, B>(); if (joinResult.isEmpty()) return new TreeMap<A, B>();
return joinResult; return joinResult;
} }
public static <A, B> SortedMap<A, B> joinConstructive(final SortedMap<A, B> map1, final SortedMap<A, B> map2, final boolean concatStrings) { public static <A, B> SortedMap<A, B> joinConstructive(final SortedMap<A, B> map1, final SortedMap<A, B> map2, final boolean concatStrings) {
// comparators must be equal // comparators must be equal
if ((map1 == null) || (map2 == null)) return null; if ((map1 == null) || (map2 == null)) return null;
@ -134,7 +134,7 @@ public final class SetTools {
} }
return joinConstructiveByEnumeration(map1, map2, concatStrings); return joinConstructiveByEnumeration(map1, map2, concatStrings);
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private static <A, B> SortedMap<A, B> joinConstructiveByTest(final SortedMap<A, B> small, final SortedMap<A, B> large, final boolean concatStrings) { private static <A, B> SortedMap<A, B> joinConstructiveByTest(final SortedMap<A, B> small, final SortedMap<A, B> large, final boolean concatStrings) {
final SortedMap<A, B> result = new TreeMap<A, B>(large.comparator()); final SortedMap<A, B> result = new TreeMap<A, B>(large.comparator());
@ -198,7 +198,7 @@ public final class SetTools {
} }
return result; return result;
} }
// now the same for set-set // now the same for set-set
public static <A> SortedSet<A> joinConstructive(final SortedSet<A> set1, final SortedSet<A> set2) { public static <A> SortedSet<A> joinConstructive(final SortedSet<A> set1, final SortedSet<A> set2) {
// comparators must be equal // comparators must be equal
@ -220,9 +220,9 @@ public final class SetTools {
return joinConstructiveByEnumeration(set1, set2); return joinConstructiveByEnumeration(set1, set2);
} }
private static <A> SortedSet<A> joinConstructiveByTest(final SortedSet<A> small, final SortedSet<A> large) { public static <A> SortedSet<A> joinConstructiveByTest(final Collection<A> small, final SortedSet<A> large) {
final Iterator<A> mi = small.iterator(); final Iterator<A> mi = small.iterator();
final SortedSet<A> result = new TreeSet<A>(small.comparator()); final SortedSet<A> result = new TreeSet<A>(large.comparator());
A o; A o;
while (mi.hasNext()) { while (mi.hasNext()) {
o = mi.next(); o = mi.next();
@ -256,7 +256,7 @@ public final class SetTools {
} }
return result; return result;
} }
/** /**
* test if one set is totally included in another set * test if one set is totally included in another set
* @param <A> * @param <A>
@ -269,8 +269,8 @@ public final class SetTools {
if (!large.contains(o)) return false; if (!large.contains(o)) return false;
} }
return true; return true;
} }
/** /**
* test if one set is totally included in another set * test if one set is totally included in another set
* @param small * @param small
@ -282,8 +282,8 @@ public final class SetTools {
if (!large.has(handle)) return false; if (!large.has(handle)) return false;
} }
return true; return true;
} }
/** /**
* test if the intersection of two sets is not empty * test if the intersection of two sets is not empty
* @param <A> * @param <A>
@ -379,7 +379,7 @@ public final class SetTools {
} }
return false; return false;
} }
private static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) { private static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) {
// implement pairwise enumeration // implement pairwise enumeration
final Comparator<byte[]> comp = set1.comparator(); final Comparator<byte[]> comp = set1.comparator();
@ -402,7 +402,7 @@ public final class SetTools {
} }
return false; return false;
} }
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// exclude // exclude
@ -416,7 +416,7 @@ public final class SetTools {
return excludeConstructiveByTestMapInSet(map, set); return excludeConstructiveByTestMapInSet(map, set);
// return excludeConstructiveByEnumeration(map, set); // return excludeConstructiveByEnumeration(map, set);
} }
private static <A, B> TreeMap<A, B> excludeConstructiveByTestMapInSet(final TreeMap<A, B> map, final Set<A> set) { private static <A, B> TreeMap<A, B> excludeConstructiveByTestMapInSet(final TreeMap<A, B> map, final Set<A> set) {
final TreeMap<A, B> result = new TreeMap<A, B>(map.comparator()); final TreeMap<A, B> result = new TreeMap<A, B>(map.comparator());
A o; A o;
@ -427,7 +427,7 @@ public final class SetTools {
return result; return result;
} }
*/ */
public static <A, B> void excludeDestructive(final Map<A, B> map, final Set<A> set) { public static <A, B> void excludeDestructive(final Map<A, B> map, final Set<A> set) {
// comparators must be equal // comparators must be equal
if (map == null) return; if (map == null) return;
@ -440,40 +440,40 @@ public final class SetTools {
else else
excludeDestructiveByTestSetInMap(map, set); excludeDestructiveByTestSetInMap(map, set);
} }
private static <A, B> void excludeDestructiveByTestMapInSet(final Map<A, B> map, final Set<A> set) { private static <A, B> void excludeDestructiveByTestMapInSet(final Map<A, B> map, final Set<A> set) {
final Iterator<A> mi = map.keySet().iterator(); final Iterator<A> mi = map.keySet().iterator();
while (mi.hasNext()) if (set.contains(mi.next())) mi.remove(); while (mi.hasNext()) if (set.contains(mi.next())) mi.remove();
} }
private static <A, B> void excludeDestructiveByTestSetInMap(final Map<A, B> map, final Set<A> set) { private static <A, B> void excludeDestructiveByTestSetInMap(final Map<A, B> map, final Set<A> set) {
final Iterator<A> si = set.iterator(); final Iterator<A> si = set.iterator();
while (si.hasNext()) map.remove(si.next()); while (si.hasNext()) map.remove(si.next());
} }
// and the same again with set-set // and the same again with set-set
public static <A> void excludeDestructive(final Set<A> set1, final Set<A> set2) { public static <A> void excludeDestructive(final Set<A> set1, final Set<A> set2) {
if (set1 == null) return; if (set1 == null) return;
if (set2 == null) return; if (set2 == null) return;
assert !(set1 instanceof SortedSet<?> && set2 instanceof SortedSet<?>) || ((SortedSet<A>) set1).comparator() == ((SortedSet<A>) set2).comparator(); assert !(set1 instanceof SortedSet<?> && set2 instanceof SortedSet<?>) || ((SortedSet<A>) set1).comparator() == ((SortedSet<A>) set2).comparator();
if (set1.isEmpty() || set2.isEmpty()) return; if (set1.isEmpty() || set2.isEmpty()) return;
if (set1.size() < set2.size()) if (set1.size() < set2.size())
excludeDestructiveByTestSmallInLarge(set1, set2); excludeDestructiveByTestSmallInLarge(set1, set2);
else else
excludeDestructiveByTestLargeInSmall(set1, set2); excludeDestructiveByTestLargeInSmall(set1, set2);
} }
private static <A> void excludeDestructiveByTestSmallInLarge(final Set<A> small, final Set<A> large) { public static <A> void excludeDestructiveByTestSmallInLarge(final Collection<A> small, final Set<A> large) {
final Iterator<A> mi = small.iterator(); final Iterator<A> mi = small.iterator();
while (mi.hasNext()) if (large.contains(mi.next())) mi.remove(); while (mi.hasNext()) if (large.contains(mi.next())) mi.remove();
} }
private static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Set<A> small) { public static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Collection<A> small) {
final Iterator<A> si = small.iterator(); final Iterator<A> si = small.iterator();
while (si.hasNext()) large.remove(si.next()); while (si.hasNext()) large.remove(si.next());
} }
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
public static SortedMap<String, String> loadMap(final String filename, final String sep) { public static SortedMap<String, String> loadMap(final String filename, final String sep) {
@ -488,13 +488,13 @@ public final class SetTools {
if ((line.length() > 0 && line.charAt(0) != '#') && ((pos = line.indexOf(sep)) > 0)) if ((line.length() > 0 && line.charAt(0) != '#') && ((pos = line.indexOf(sep)) > 0))
map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim()); map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim());
} }
} catch (final IOException e) { } catch (final IOException e) {
} finally { } finally {
if (br != null) try { br.close(); } catch (final Exception e) {} if (br != null) try { br.close(); } catch (final Exception e) {}
} }
return map; return map;
} }
public static SortedMap<String, List<String>> loadMapMultiValsPerKey(final String filename, final String sep) { public static SortedMap<String, List<String>> loadMapMultiValsPerKey(final String filename, final String sep) {
final SortedMap<String, List<String>> map = new TreeMap<String, List<String>>(); final SortedMap<String, List<String>> map = new TreeMap<String, List<String>>();
BufferedReader br = null; BufferedReader br = null;
@ -511,17 +511,17 @@ public final class SetTools {
map.get(key).add(value); map.get(key).add(value);
} }
} }
} catch (final IOException e) { } catch (final IOException e) {
} finally { } finally {
if (br != null) try { br.close(); } catch (final Exception e) {} if (br != null) try { br.close(); } catch (final Exception e) {}
} }
return map; return map;
} }
public static SortedSet<String> loadList(final File file, final Comparator<String> c) { public static SortedSet<String> loadList(final File file, final Comparator<String> c) {
final SortedSet<String> list = new TreeSet<String>(c); final SortedSet<String> list = new TreeSet<String>(c);
if (!(file.exists())) return list; if (!(file.exists())) return list;
BufferedReader br = null; BufferedReader br = null;
try { try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
@ -531,7 +531,7 @@ public final class SetTools {
if (line.length() > 0 && line.charAt(0) != '#') list.add(line.trim().toLowerCase()); if (line.length() > 0 && line.charAt(0) != '#') list.add(line.trim().toLowerCase());
} }
br.close(); br.close();
} catch (final IOException e) { } catch (final IOException e) {
} finally { } finally {
if (br != null) try{br.close();}catch(final Exception e){} if (br != null) try{br.close();}catch(final Exception e){}
} }
@ -547,7 +547,7 @@ public final class SetTools {
} }
return sb.toString(); return sb.toString();
} }
public static String setToString(final Set<String> set, final char separator) { public static String setToString(final Set<String> set, final char separator) {
final Iterator<String> i = set.iterator(); final Iterator<String> i = set.iterator();
final StringBuilder sb = new StringBuilder(set.size() * 7); final StringBuilder sb = new StringBuilder(set.size() * 7);
@ -560,7 +560,7 @@ public final class SetTools {
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
public static void main(final String[] args) { public static void main(final String[] args) {
final SortedMap<String, String> m = new TreeMap<String, String>(); final SortedMap<String, String> m = new TreeMap<String, String>();
final SortedMap<String, String> s = new TreeMap<String, String>(); final SortedMap<String, String> s = new TreeMap<String, String>();

@ -35,7 +35,6 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
@ -56,7 +55,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.SetTools; import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
@ -162,7 +160,7 @@ public final class QueryParams {
} }
} else { } else {
this.queryString = queryString; this.queryString = queryString;
final TreeSet<String>[] cq = cleanQuery(queryString); final Collection<String>[] cq = cleanQuery(queryString);
this.queryHashes = Word.words2hashesHandles(cq[0]); this.queryHashes = Word.words2hashesHandles(cq[0]);
this.excludeHashes = Word.words2hashesHandles(cq[1]); this.excludeHashes = Word.words2hashesHandles(cq[1]);
this.fullqueryHashes = Word.words2hashesHandles(cq[2]); this.fullqueryHashes = Word.words2hashesHandles(cq[2]);
@ -378,11 +376,11 @@ public final class QueryParams {
private static String seps = "'.,/&_"; static {seps += '"';} private static String seps = "'.,/&_"; static {seps += '"';}
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static TreeSet<String>[] cleanQuery(String querystring) { public static Collection<String>[] cleanQuery(String querystring) {
// returns three sets: a query set, a exclude set and a full query set // returns three sets: a query set, a exclude set and a full query set
final TreeSet<String> query = new TreeSet<String>(NaturalOrder.naturalComparator); final Collection<String> query = new ArrayList<String>();
final TreeSet<String> exclude = new TreeSet<String>(NaturalOrder.naturalComparator); final Collection<String> exclude = new ArrayList<String>();
final TreeSet<String> fullquery = new TreeSet<String>(NaturalOrder.naturalComparator); final Collection<String> fullquery = new ArrayList<String>();
if ((querystring != null) && (!querystring.isEmpty())) { if ((querystring != null) && (!querystring.isEmpty())) {
@ -401,22 +399,23 @@ public final class QueryParams {
final String[] queries = querystring.split(" "); final String[] queries = querystring.split(" ");
for (String quer : queries) { for (String quer : queries) {
if (quer.startsWith("-")) { if (quer.startsWith("-")) {
exclude.add(quer.substring(1)); String x = quer.substring(1);
if (!exclude.contains(x)) exclude.add(x);
} else { } else {
while ((c = quer.indexOf('-')) >= 0) { while ((c = quer.indexOf('-')) >= 0) {
s = quer.substring(0, c); s = quer.substring(0, c);
l = s.length(); l = s.length();
if (l >= Condenser.wordminsize) {query.add(s);} if (l >= Condenser.wordminsize && !query.contains(s)) {query.add(s);}
if (l > 0) {fullquery.add(s);} if (l > 0 && !fullquery.contains(s)) {fullquery.add(s);}
quer = quer.substring(c + 1); quer = quer.substring(c + 1);
} }
l = quer.length(); l = quer.length();
if (l >= Condenser.wordminsize) {query.add(quer);} if (l >= Condenser.wordminsize && !query.contains(quer)) {query.add(quer);}
if (l > 0) {fullquery.add(quer);} if (l > 0 && !fullquery.contains(quer)) {fullquery.add(quer);}
} }
} }
} }
return new TreeSet[]{query, exclude, fullquery}; return new Collection[]{query, exclude, fullquery};
} }
public String queryString(final boolean encodeHTML) { public String queryString(final boolean encodeHTML) {
@ -438,7 +437,7 @@ public final class QueryParams {
} }
} }
public TreeSet<String>[] queryWords() { public Collection<String>[] queryWords() {
return cleanQuery(this.queryString); return cleanQuery(this.queryString);
} }

Loading…
Cancel
Save