enhancements in did-you-mean guessing

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7243 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent a59c885ee0
commit de722090b5

@ -14,7 +14,7 @@
<script type="text/javascript" src="/yacy/ui/js/jquery.autocomplete.js"></script>
<script type="text/javascript">
$(document).ready(function() {
$('#search').autocomplete('/suggest.json', {parse: opensearch, delay: 0, selectFirst: false});
$('#search').autocomplete('/suggest.json', {parse: opensearch, delay: 0, selectFirst: false, scroll: false});
function opensearch(data) {
var parsed = [];
data = eval('({"suggest":' + data + '})');

@ -46,6 +46,8 @@ public class suggest {
// get query
String originalquerystring = (post == null) ? "" : post.get("query", post.get("q", "")).trim();
String querystring = originalquerystring.replace('+', ' ');
int timeout = (post == null) ? 300 : post.getInt("timeout", 300);
int count = (post == null) ? 20 : post.getInt("count", 20);
// get segment
Segment indexSegment = null;
@ -60,7 +62,7 @@ public class suggest {
}
DidYouMean didYouMean = new DidYouMean(indexSegment.termIndex(), querystring);
Iterator<String> meanIt = didYouMean.getSuggestions(300, 10).iterator();
Iterator<String> meanIt = didYouMean.getSuggestions(timeout, count).iterator();
int meanCount = 0;
String suggestion;
StringBuilder suggestions = new StringBuilder(120);

@ -42,7 +42,7 @@
<script type="text/javascript" src="/yacy/ui/js/jquery.autocomplete.js"></script>
<script type="text/javascript">
$(document).ready(function() {
$('#search').autocomplete('/suggest.json', {parse: opensearch, delay: 0, selectFirst: false});
$('#search').autocomplete('/suggest.json', {parse: opensearch, delay: 0, selectFirst: false, scroll: false});
function opensearch(data) {
var parsed = [];
data = eval('({"suggest":' + data + '})');

@ -546,7 +546,7 @@ public class yacysearch {
prop.put("meanCount", meanMax);
if (meanMax > 0) {
DidYouMean didYouMean = new DidYouMean(indexSegment.termIndex(), querystring);
Iterator<String> meanIt = didYouMean.getSuggestions(100, 10).iterator();
Iterator<String> meanIt = didYouMean.getSuggestions(100, 5).iterator();
int meanCount = 0;
String suggestion;
while(meanCount<meanMax && meanIt.hasNext()) {

@ -11,6 +11,7 @@ import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.util.ScoreCluster;
/**
@ -121,15 +122,28 @@ public class DidYouMean {
long timelimit = System.currentTimeMillis() + timeout;
SortedSet<String> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) return preSorted;
SortedSet<String> countSorted = Collections.synchronizedSortedSet(new TreeSet<String>(new indexSizeComparator()));
int wc = index.count(Word.word2hash(this.word)); // all counts must be greater than this
int c0;
ScoreCluster<String> scored = new ScoreCluster<String>();
for (final String s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
if (preSortSelection <= 0) break;
c0 = index.count(Word.word2hash(s));
if (c0 > wc) countSorted.add(s);
preSortSelection--;
if (scored.size() >= 2 * preSortSelection) break;
scored.addScore(s, index.count(Word.word2hash(s)));
}
SortedSet<String> countSorted = Collections.synchronizedSortedSet(new TreeSet<String>(new indexSizeComparator()));
if (System.currentTimeMillis() > timelimit) {
while (scored.size() > 0) {
if (countSorted.size() >= preSortSelection) break;
String s = scored.getMaxObject();
scored.deleteScore(s);
countSorted.add(s);
}
} else {
int wc = index.count(Word.word2hash(this.word)); // all counts must be greater than this
while (scored.size() > 0) {
if (countSorted.size() >= preSortSelection) break;
String s = scored.getMaxObject();
int score = scored.deleteScore(s);
if (score > wc) countSorted.add(s);
}
}
return countSorted;
}
@ -258,9 +272,11 @@ public class DidYouMean {
@Override
public void run() {
char m;
for (int i = 0; i < wordLen; i++) try {
m = word.charAt(i);
for (char c: alphabet) {
test(word.substring(0, i) + c + word.substring(i + 1));
if (m != c) test(word.substring(0, i) + c + word.substring(i + 1));
if (System.currentTimeMillis() > timeLimit) return;
}
} catch (InterruptedException e) {}

@ -115,12 +115,12 @@ public class DidYouMeanLibrary {
String string = s.trim().toLowerCase();
SortedSet<String> t = this.dict.tailSet(string);
for (final String r: t) {
if (r.startsWith(string)) ret.add(r); else break;
if (r.startsWith(string) && r.length() > string.length()) ret.add(r); else break;
}
string = reverse(string);
t = this.tcid.tailSet(string);
for (final String r: t) {
if (r.startsWith(string)) ret.add(reverse(r)); else break;
if (r.startsWith(string) && r.length() > string.length()) ret.add(reverse(r)); else break;
}
return ret;
}

@ -153,8 +153,10 @@ public class GeonamesLocalization implements Localization {
public Set<String> recommend(String s) {
Set<String> a = new HashSet<String>();
s = s.trim().toLowerCase();
if (s.length() == 0) return a;
SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
for (String r: t.keySet()) {
r = r.toLowerCase();
if (r.startsWith(s)) a.add(r); else break;
}
return a;

@ -214,8 +214,10 @@ public class OpenGeoDBLocalization implements Localization {
public Set<String> recommend(String s) {
Set<String> a = new HashSet<String>();
s = s.trim().toLowerCase();
if (s.length() == 0) return a;
SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
for (String r: t.keySet()) {
r = r.toLowerCase();
if (r.startsWith(s)) a.add(r); else break;
}
return a;

@ -80,6 +80,7 @@ public class OverarchingLocalization implements Localization {
*/
public Set<String> recommend(String s) {
Set<String> recommendations = new HashSet<String>();
if (s.length() == 0) return recommendations;
for (Localization service: this.services.values()) {
recommendations.addAll(service.recommend(s));
}

Loading…
Cancel
Save