package de.anomic.tools; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.TreeSet; import de.anomic.kelondro.text.IndexCell; import de.anomic.kelondro.text.referencePrototype.WordReference; import de.anomic.plasma.parser.Word; // People make mistakes when they type words. // The most common mistakes are the four categories listed below: // (1) Changing one letter: bat / cat; // (2) Adding one letter: bat / boat; // (3) Deleting one letter: frog / fog; or // (4) Reversing two consecutive letters: two / tow. public class DidYouMean { private static char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p', 'q','r','s','t','u','v','w','x','y','z','\u00e4','\u00f6','\u00fc','\u00df'}; private final Set set; private final IndexCell index; private String word; private int len; public DidYouMean(final IndexCell index) { this.set = new HashSet(); this.word = ""; this.len = 0; this.index = index; } public Set getSuggestion(final String word) { this.word = word.toLowerCase(); this.len = word.length(); ChangingOneLetter(); AddingOneLetter(); DeletingOneLetter(); ReversingTwoConsecutiveLetters(); final Iterator it = this.set.iterator(); final TreeSet rset = new TreeSet(new wordSizeComparator()); String s; while(it.hasNext()) { s = it.next(); if (index.has(Word.word2hash(s))) { rset.add(s); } } rset.remove(word.toLowerCase()); return rset; } private void ChangingOneLetter() { for(int i=0; i { public int compare(final String o1, final String o2) { final Integer i1 = index.count(Word.word2hash(o1)); final Integer i2 = index.count(Word.word2hash(o2)); return i2.compareTo(i1); } } }