diff --git a/source/de/anomic/tools/DidYouMean.java b/source/de/anomic/tools/DidYouMean.java index 6a7d23ee2..0c9f52189 100644 --- a/source/de/anomic/tools/DidYouMean.java +++ b/source/de/anomic/tools/DidYouMean.java @@ -1,8 +1,10 @@ package de.anomic.tools; +import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import java.util.TreeSet; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.parser.Word; @@ -17,11 +19,11 @@ import de.anomic.plasma.parser.Word; public class DidYouMean { private static char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p', - 'q','r','s','t','u','v','w','x','y','z','ä','ö','ü','ß'}; + 'q','r','s','t','u','v','w','x','y','z','\u00e4','\u00f6','\u00fc','\u00df'}; private final Set set; + private final plasmaSwitchboard sb; private String word; private int len; - private final plasmaSwitchboard sb; public DidYouMean(final plasmaSwitchboard env) { this.set = new HashSet(); @@ -30,21 +32,26 @@ public class DidYouMean { this.sb = env; } - public Set getSuggestion(String word) { + public Set getSuggestion(final String word) { this.word = word.toLowerCase(); this.len = word.length(); + ChangingOneLetter(); AddingOneLetter(); DeletingOneLetter(); ReversingTwoConsecutiveLetters(); - Iterator it = this.set.iterator(); + + final Iterator it = this.set.iterator(); + // final TreeSet rset = new TreeSet(new wordSizeComparator()); + final TreeSet rset = new TreeSet(); String s; - final HashSet rset = new HashSet(); - while(it.hasNext()) { + int count = 0; + while(count<10 && it.hasNext()) { s = it.next(); if(sb.indexSegment.termIndex().has(Word.word2hash(s))) { rset.add(s); - } + count++; + } } rset.remove(word.toLowerCase()); return rset; @@ -78,6 +85,16 @@ public class DidYouMean { } } + public class wordSizeComparator implements Comparator { + + public int compare(final String o1, final String o2) { + final Integer i1 = sb.indexSegment.termIndex().count(Word.word2hash(o1)); + final Integer i2 = sb.indexSegment.termIndex().count(Word.word2hash(o2)); + return i1.compareTo(i2); + } + + } + }