enhanced did-you-mean (a bit): can now remember previously searched

words (plus small enhancements)
pull/1/head
orbiter 12 years ago
parent a725a4242f
commit 940c6849ee

@ -58,7 +58,7 @@ public class suggest {
final String ext = header.get("EXT", "");
final boolean json = ext.equals("json");
final boolean xml = ext.equals("xml");
final boolean more = post != null && post.containsKey("more");
final boolean more = sb.index.connectedRWI() || (post != null && post.containsKey("more")); // with RWIs connected the guessing is super-fast
// get query
final String originalquerystring = (post == null) ? "" : post.get("query", post.get("q", "")).trim();

@ -26,6 +26,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.HashSet;
import java.util.Map;
@ -219,7 +220,7 @@ public class WordCache {
}
}
public static void learn(Set<String> wordset) {
public static void learn(Collection<String> wordset) {
for (String s: wordset) {
learn(new StringBuilder(s));
}
@ -320,8 +321,12 @@ public class WordCache {
}
return size;
}
public static int sizeCommonWords() {
return commonWords.size();
}
public static void clear() {
public static void clearCommonWords() {
commonWords.clear();
}

@ -432,7 +432,7 @@ public class DidYouMean {
StringBuilder s;
try {
while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) {
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.segment.getWordCountGuess(s.toString()) > 0) {
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.segment.getWordCountGuess(s.toString()) > 2) {
DidYouMean.this.resultSet.add(s);
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {

@ -93,7 +93,7 @@ public class ResourceObserver {
SearchEventCache.cleanupEvents(true);
this.sb.trail.clear();
Switchboard.urlBlacklist.clearblacklistCache();
WordCache.clear();
WordCache.clearCommonWords();
Domains.clear();
}
}

@ -2045,7 +2045,7 @@ public final class Switchboard extends serverSwitch {
PDFont.clearResources(); // eats up megabytes, see http://markmail.org/thread/quk5odee4hbsauhu
// clear caches
WordCache.clear();
if (WordCache.sizeCommonWords() > 1000) WordCache.clearCommonWords();
Domains.clear();
// clean up image stack

@ -287,15 +287,16 @@ public class Segment {
*/
public int getWordCountGuess(String word) {
if (word == null || word.indexOf(':') >= 0 || word.indexOf(' ') >= 0 || word.indexOf('/') >= 0) return 0;
if (this.termIndex == null) {
try {
return (int) this.fulltext.getDefaultConnector().getQueryCount(CollectionSchema.text_t.getSolrFieldName() + ':' + word);
} catch (Throwable e) {
Log.logException(e);
return 0;
}
if (this.termIndex != null) {
int count = this.termIndex.count(Word.word2hash(word));
if (count > 0) return count;
}
try {
return (int) this.fulltext.getDefaultConnector().getQueryCount(CollectionSchema.text_t.getSolrFieldName() + ':' + word);
} catch (Throwable e) {
Log.logException(e);
return 0;
}
return this.termIndex.count(Word.word2hash(word));
}
public boolean exists(final String urlhash) {

@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Map;
import java.util.SortedSet;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.SolrType;
@ -92,6 +93,9 @@ public class QueryGoal {
for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words, this.all_words);
for (String s: this.exclude_strings) parseQuery(s, this.exclude_words, this.exclude_words, this.all_words);
WordCache.learn(this.include_strings);
WordCache.learn(this.exclude_strings);
this.include_hashes = null;
this.exclude_hashes = null;
this.all_hashes = null;

Loading…
Cancel
Save