changed storage of search words: keep order

pull/1/head
Michael Peter Christen 13 years ago
parent ed39ef2890
commit e2f8f263e8

@ -24,9 +24,9 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.TreeSet;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.protocol.RequestHeader;
@ -43,7 +43,6 @@ import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -75,7 +74,7 @@ public final class timeline {
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
if (language == null) language = "en";
}
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute
final Collection<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute
HandleSet q = Word.words2hashesHandles(query[0]);
// tell all threads to do nothing for a specific time

@ -602,14 +602,14 @@ public class yacysearch {
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
// the query
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
final Collection<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE;
// filter out stopwords
final SortedSet<String> filtered = SetTools.joinConstructive(query[0], Switchboard.stopwords);
final SortedSet<String> filtered = SetTools.joinConstructiveByTest(query[0], Switchboard.stopwords);
if ( !filtered.isEmpty() ) {
SetTools.excludeDestructive(query[0], Switchboard.stopwords);
SetTools.excludeDestructiveByTestSmallInLarge(query[0], Switchboard.stopwords);
}
// if a minus-button was hit, remove a special reference first

@ -25,8 +25,8 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.net.MalformedURLException;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
@ -165,7 +165,7 @@ public class yacysearchitem {
prop.putHTML("content_publisher", result.publisher());
prop.putHTML("content_creator", result.creator());// author
prop.putHTML("content_subject", result.subject());
final Set<String>[] query = theQuery.queryWords();
final Collection<String>[] query = theQuery.queryWords();
final StringBuilder s = new StringBuilder(query[0].size() * 20);
for (final String t: query[0]) {
s.append('+').append(t);

@ -26,6 +26,7 @@
package net.yacy.kelondro.data.word;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
@ -156,7 +157,7 @@ public class Word {
return p;
}
public static final HandleSet words2hashesHandles(final Set<String> words) {
public static final HandleSet words2hashesHandles(final Collection<String> words) {
final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.size());
for (final String word: words)
try {

@ -220,9 +220,9 @@ public final class SetTools {
return joinConstructiveByEnumeration(set1, set2);
}
private static <A> SortedSet<A> joinConstructiveByTest(final SortedSet<A> small, final SortedSet<A> large) {
public static <A> SortedSet<A> joinConstructiveByTest(final Collection<A> small, final SortedSet<A> large) {
final Iterator<A> mi = small.iterator();
final SortedSet<A> result = new TreeSet<A>(small.comparator());
final SortedSet<A> result = new TreeSet<A>(large.comparator());
A o;
while (mi.hasNext()) {
o = mi.next();
@ -464,12 +464,12 @@ public final class SetTools {
excludeDestructiveByTestLargeInSmall(set1, set2);
}
private static <A> void excludeDestructiveByTestSmallInLarge(final Set<A> small, final Set<A> large) {
public static <A> void excludeDestructiveByTestSmallInLarge(final Collection<A> small, final Set<A> large) {
final Iterator<A> mi = small.iterator();
while (mi.hasNext()) if (large.contains(mi.next())) mi.remove();
}
private static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Set<A> small) {
public static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Collection<A> small) {
final Iterator<A> si = small.iterator();
while (si.hasNext()) large.remove(si.next());
}

@ -35,7 +35,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@ -56,7 +55,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed;
import net.yacy.search.index.Segment;
@ -162,7 +160,7 @@ public final class QueryParams {
}
} else {
this.queryString = queryString;
final TreeSet<String>[] cq = cleanQuery(queryString);
final Collection<String>[] cq = cleanQuery(queryString);
this.queryHashes = Word.words2hashesHandles(cq[0]);
this.excludeHashes = Word.words2hashesHandles(cq[1]);
this.fullqueryHashes = Word.words2hashesHandles(cq[2]);
@ -378,11 +376,11 @@ public final class QueryParams {
private static String seps = "'.,/&_"; static {seps += '"';}
@SuppressWarnings("unchecked")
public static TreeSet<String>[] cleanQuery(String querystring) {
public static Collection<String>[] cleanQuery(String querystring) {
// returns three sets: a query set, a exclude set and a full query set
final TreeSet<String> query = new TreeSet<String>(NaturalOrder.naturalComparator);
final TreeSet<String> exclude = new TreeSet<String>(NaturalOrder.naturalComparator);
final TreeSet<String> fullquery = new TreeSet<String>(NaturalOrder.naturalComparator);
final Collection<String> query = new ArrayList<String>();
final Collection<String> exclude = new ArrayList<String>();
final Collection<String> fullquery = new ArrayList<String>();
if ((querystring != null) && (!querystring.isEmpty())) {
@ -401,22 +399,23 @@ public final class QueryParams {
final String[] queries = querystring.split(" ");
for (String quer : queries) {
if (quer.startsWith("-")) {
exclude.add(quer.substring(1));
String x = quer.substring(1);
if (!exclude.contains(x)) exclude.add(x);
} else {
while ((c = quer.indexOf('-')) >= 0) {
s = quer.substring(0, c);
l = s.length();
if (l >= Condenser.wordminsize) {query.add(s);}
if (l > 0) {fullquery.add(s);}
if (l >= Condenser.wordminsize && !query.contains(s)) {query.add(s);}
if (l > 0 && !fullquery.contains(s)) {fullquery.add(s);}
quer = quer.substring(c + 1);
}
l = quer.length();
if (l >= Condenser.wordminsize) {query.add(quer);}
if (l > 0) {fullquery.add(quer);}
if (l >= Condenser.wordminsize && !query.contains(quer)) {query.add(quer);}
if (l > 0 && !fullquery.contains(quer)) {fullquery.add(quer);}
}
}
}
return new TreeSet[]{query, exclude, fullquery};
return new Collection[]{query, exclude, fullquery};
}
public String queryString(final boolean encodeHTML) {
@ -438,7 +437,7 @@ public final class QueryParams {
}
}
public TreeSet<String>[] queryWords() {
public Collection<String>[] queryWords() {
return cleanQuery(this.queryString);
}

Loading…
Cancel
Save