changed storage of search words: keep order

pull/1/head
Michael Peter Christen 13 years ago
parent ed39ef2890
commit e2f8f263e8

@ -24,9 +24,9 @@
// along with this program; if not, write to the Free Software // along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.TreeSet;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
@ -43,7 +43,6 @@ import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -75,7 +74,7 @@ public final class timeline {
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent); language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
if (language == null) language = "en"; if (language == null) language = "en";
} }
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute final Collection<String>[] query = QueryParams.cleanQuery(querystring); // converts also umlaute
HandleSet q = Word.words2hashesHandles(query[0]); HandleSet q = Word.words2hashesHandles(query[0]);
// tell all threads to do nothing for a specific time // tell all threads to do nothing for a specific time

@ -602,14 +602,14 @@ public class yacysearch {
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", ""); (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
// the query // the query
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute final Collection<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE; final int maxDistance = (querystring.indexOf('"', 0) >= 0) ? query.length - 1 : Integer.MAX_VALUE;
// filter out stopwords // filter out stopwords
final SortedSet<String> filtered = SetTools.joinConstructive(query[0], Switchboard.stopwords); final SortedSet<String> filtered = SetTools.joinConstructiveByTest(query[0], Switchboard.stopwords);
if ( !filtered.isEmpty() ) { if ( !filtered.isEmpty() ) {
SetTools.excludeDestructive(query[0], Switchboard.stopwords); SetTools.excludeDestructiveByTestSmallInLarge(query[0], Switchboard.stopwords);
} }
// if a minus-button was hit, remove a special reference first // if a minus-button was hit, remove a special reference first

@ -25,8 +25,8 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Set;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
@ -165,7 +165,7 @@ public class yacysearchitem {
prop.putHTML("content_publisher", result.publisher()); prop.putHTML("content_publisher", result.publisher());
prop.putHTML("content_creator", result.creator());// author prop.putHTML("content_creator", result.creator());// author
prop.putHTML("content_subject", result.subject()); prop.putHTML("content_subject", result.subject());
final Set<String>[] query = theQuery.queryWords(); final Collection<String>[] query = theQuery.queryWords();
final StringBuilder s = new StringBuilder(query[0].size() * 20); final StringBuilder s = new StringBuilder(query[0].size() * 20);
for (final String t: query[0]) { for (final String t: query[0]) {
s.append('+').append(t); s.append('+').append(t);

@ -26,6 +26,7 @@
package net.yacy.kelondro.data.word; package net.yacy.kelondro.data.word;
import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
@ -156,7 +157,7 @@ public class Word {
return p; return p;
} }
public static final HandleSet words2hashesHandles(final Set<String> words) { public static final HandleSet words2hashesHandles(final Collection<String> words) {
final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.size()); final HandleSet hashes = new HandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, words.size());
for (final String word: words) for (final String word: words)
try { try {

@ -220,9 +220,9 @@ public final class SetTools {
return joinConstructiveByEnumeration(set1, set2); return joinConstructiveByEnumeration(set1, set2);
} }
private static <A> SortedSet<A> joinConstructiveByTest(final SortedSet<A> small, final SortedSet<A> large) { public static <A> SortedSet<A> joinConstructiveByTest(final Collection<A> small, final SortedSet<A> large) {
final Iterator<A> mi = small.iterator(); final Iterator<A> mi = small.iterator();
final SortedSet<A> result = new TreeSet<A>(small.comparator()); final SortedSet<A> result = new TreeSet<A>(large.comparator());
A o; A o;
while (mi.hasNext()) { while (mi.hasNext()) {
o = mi.next(); o = mi.next();
@ -464,12 +464,12 @@ public final class SetTools {
excludeDestructiveByTestLargeInSmall(set1, set2); excludeDestructiveByTestLargeInSmall(set1, set2);
} }
private static <A> void excludeDestructiveByTestSmallInLarge(final Set<A> small, final Set<A> large) { public static <A> void excludeDestructiveByTestSmallInLarge(final Collection<A> small, final Set<A> large) {
final Iterator<A> mi = small.iterator(); final Iterator<A> mi = small.iterator();
while (mi.hasNext()) if (large.contains(mi.next())) mi.remove(); while (mi.hasNext()) if (large.contains(mi.next())) mi.remove();
} }
private static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Set<A> small) { public static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Collection<A> small) {
final Iterator<A> si = small.iterator(); final Iterator<A> si = small.iterator();
while (si.hasNext()) large.remove(si.next()); while (si.hasNext()) large.remove(si.next());
} }

@ -35,7 +35,6 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
@ -56,7 +55,6 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.SetTools; import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
@ -162,7 +160,7 @@ public final class QueryParams {
} }
} else { } else {
this.queryString = queryString; this.queryString = queryString;
final TreeSet<String>[] cq = cleanQuery(queryString); final Collection<String>[] cq = cleanQuery(queryString);
this.queryHashes = Word.words2hashesHandles(cq[0]); this.queryHashes = Word.words2hashesHandles(cq[0]);
this.excludeHashes = Word.words2hashesHandles(cq[1]); this.excludeHashes = Word.words2hashesHandles(cq[1]);
this.fullqueryHashes = Word.words2hashesHandles(cq[2]); this.fullqueryHashes = Word.words2hashesHandles(cq[2]);
@ -378,11 +376,11 @@ public final class QueryParams {
private static String seps = "'.,/&_"; static {seps += '"';} private static String seps = "'.,/&_"; static {seps += '"';}
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static TreeSet<String>[] cleanQuery(String querystring) { public static Collection<String>[] cleanQuery(String querystring) {
// returns three sets: a query set, a exclude set and a full query set // returns three sets: a query set, a exclude set and a full query set
final TreeSet<String> query = new TreeSet<String>(NaturalOrder.naturalComparator); final Collection<String> query = new ArrayList<String>();
final TreeSet<String> exclude = new TreeSet<String>(NaturalOrder.naturalComparator); final Collection<String> exclude = new ArrayList<String>();
final TreeSet<String> fullquery = new TreeSet<String>(NaturalOrder.naturalComparator); final Collection<String> fullquery = new ArrayList<String>();
if ((querystring != null) && (!querystring.isEmpty())) { if ((querystring != null) && (!querystring.isEmpty())) {
@ -401,22 +399,23 @@ public final class QueryParams {
final String[] queries = querystring.split(" "); final String[] queries = querystring.split(" ");
for (String quer : queries) { for (String quer : queries) {
if (quer.startsWith("-")) { if (quer.startsWith("-")) {
exclude.add(quer.substring(1)); String x = quer.substring(1);
if (!exclude.contains(x)) exclude.add(x);
} else { } else {
while ((c = quer.indexOf('-')) >= 0) { while ((c = quer.indexOf('-')) >= 0) {
s = quer.substring(0, c); s = quer.substring(0, c);
l = s.length(); l = s.length();
if (l >= Condenser.wordminsize) {query.add(s);} if (l >= Condenser.wordminsize && !query.contains(s)) {query.add(s);}
if (l > 0) {fullquery.add(s);} if (l > 0 && !fullquery.contains(s)) {fullquery.add(s);}
quer = quer.substring(c + 1); quer = quer.substring(c + 1);
} }
l = quer.length(); l = quer.length();
if (l >= Condenser.wordminsize) {query.add(quer);} if (l >= Condenser.wordminsize && !query.contains(quer)) {query.add(quer);}
if (l > 0) {fullquery.add(quer);} if (l > 0 && !fullquery.contains(quer)) {fullquery.add(quer);}
} }
} }
} }
return new TreeSet[]{query, exclude, fullquery}; return new Collection[]{query, exclude, fullquery};
} }
public String queryString(final boolean encodeHTML) { public String queryString(final boolean encodeHTML) {
@ -438,7 +437,7 @@ public final class QueryParams {
} }
} }
public TreeSet<String>[] queryWords() { public Collection<String>[] queryWords() {
return cleanQuery(this.queryString); return cleanQuery(this.queryString);
} }

Loading…
Cancel
Save