- fixed encoding problem

- added limit to 10 suggestions

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6058 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
apfelmaennchen 16 years ago
parent 54a48b4184
commit da6ce37f7b

@ -1,8 +1,10 @@
package de.anomic.tools; package de.anomic.tools;
import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set; import java.util.Set;
import java.util.TreeSet;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.parser.Word; import de.anomic.plasma.parser.Word;
@ -17,11 +19,11 @@ import de.anomic.plasma.parser.Word;
public class DidYouMean { public class DidYouMean {
private static char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p', private static char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
'q','r','s','t','u','v','w','x','y','z','ä','ö','ü','}; 'q','r','s','t','u','v','w','x','y','z','\u00e4','\u00f6','\u00fc','\u00df'};
private final Set<String> set; private final Set<String> set;
private final plasmaSwitchboard sb;
private String word; private String word;
private int len; private int len;
private final plasmaSwitchboard sb;
public DidYouMean(final plasmaSwitchboard env) { public DidYouMean(final plasmaSwitchboard env) {
this.set = new HashSet<String>(); this.set = new HashSet<String>();
@ -30,21 +32,26 @@ public class DidYouMean {
this.sb = env; this.sb = env;
} }
public Set<String> getSuggestion(String word) { public Set<String> getSuggestion(final String word) {
this.word = word.toLowerCase(); this.word = word.toLowerCase();
this.len = word.length(); this.len = word.length();
ChangingOneLetter(); ChangingOneLetter();
AddingOneLetter(); AddingOneLetter();
DeletingOneLetter(); DeletingOneLetter();
ReversingTwoConsecutiveLetters(); ReversingTwoConsecutiveLetters();
Iterator<String> it = this.set.iterator();
final Iterator<String> it = this.set.iterator();
// final TreeSet<String> rset = new TreeSet<String>(new wordSizeComparator());
final TreeSet<String> rset = new TreeSet<String>();
String s; String s;
final HashSet<String> rset = new HashSet<String>(); int count = 0;
while(it.hasNext()) { while(count<10 && it.hasNext()) {
s = it.next(); s = it.next();
if(sb.indexSegment.termIndex().has(Word.word2hash(s))) { if(sb.indexSegment.termIndex().has(Word.word2hash(s))) {
rset.add(s); rset.add(s);
} count++;
}
} }
rset.remove(word.toLowerCase()); rset.remove(word.toLowerCase());
return rset; return rset;
@ -78,6 +85,16 @@ public class DidYouMean {
} }
} }
public class wordSizeComparator implements Comparator<String> {
public int compare(final String o1, final String o2) {
final Integer i1 = sb.indexSegment.termIndex().count(Word.word2hash(o1));
final Integer i2 = sb.indexSegment.termIndex().count(Word.word2hash(o2));
return i1.compareTo(i2);
}
}
} }

Loading…
Cancel
Save