|
|
|
package de.anomic.tools;
|
|
|
|
|
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.HashSet;
|
|
|
|
import java.util.Set;
|
|
|
|
|
|
|
|
import de.anomic.kelondro.text.IndexCell;
|
|
|
|
import de.anomic.kelondro.text.referencePrototype.WordReference;
|
|
|
|
import de.anomic.kelondro.util.Log;
|
|
|
|
import de.anomic.plasma.parser.Word;
|
|
|
|
|
|
|
|
// People make mistakes when they type words.
|
|
|
|
// The most common mistakes are the four categories listed below:
|
|
|
|
// (1) Changing one letter: bat / cat;
|
|
|
|
// (2) Adding one letter: bat / boat;
|
|
|
|
// (3) Deleting one letter: frog / fog; or
|
|
|
|
// (4) Reversing two consecutive letters: two / tow.
|
|
|
|
|
|
|
|
public class DidYouMean {
|
|
|
|
|
|
|
|
private static final char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
|
|
|
|
'q','r','s','t','u','v','w','x','y','z','\u00e4','\u00f6','\u00fc','\u00df'};
|
|
|
|
private static final long TIMEOUT = 2000;
|
|
|
|
|
|
|
|
private final Set<String> set;
|
|
|
|
private final IndexCell<WordReference> index;
|
|
|
|
private String word;
|
|
|
|
private int len;
|
|
|
|
|
|
|
|
private Thread ChangingOneLetter;
|
|
|
|
private Thread AddingOneLetter;
|
|
|
|
private Thread DeletingOneLetter;
|
|
|
|
private Thread ReversingTwoConsecutiveLetters;
|
|
|
|
|
|
|
|
public DidYouMean(final IndexCell<WordReference> index) {
|
|
|
|
// this.set = Collections.synchronizedSortedSet(new TreeSet<String>(new wordSizeComparator()));
|
|
|
|
this.set = Collections.synchronizedSet(new HashSet<String>());
|
|
|
|
this.word = "";
|
|
|
|
this.len = 0;
|
|
|
|
this.index = index;
|
|
|
|
|
|
|
|
this.ChangingOneLetter = new ChangingOneLetter();
|
|
|
|
this.AddingOneLetter = new AddingOneLetter();
|
|
|
|
this.DeletingOneLetter = new DeletingOneLetter();
|
|
|
|
this.ReversingTwoConsecutiveLetters = new ReversingTwoConsecutiveLetters();
|
|
|
|
}
|
|
|
|
|
|
|
|
public Set<String> getSuggestion(final String word) {
|
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
this.word = word.toLowerCase();
|
|
|
|
this.len = word.length();
|
|
|
|
|
|
|
|
this.ChangingOneLetter.start();
|
|
|
|
this.AddingOneLetter.start();
|
|
|
|
this.DeletingOneLetter.start();
|
|
|
|
this.ReversingTwoConsecutiveLetters.start();
|
|
|
|
|
|
|
|
try {
|
|
|
|
this.ChangingOneLetter.join(TIMEOUT);
|
|
|
|
this.AddingOneLetter.join(TIMEOUT);
|
|
|
|
this.DeletingOneLetter.join(TIMEOUT);
|
|
|
|
this.ReversingTwoConsecutiveLetters.join(TIMEOUT);
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
}
|
|
|
|
|
|
|
|
this.set.remove(word.toLowerCase());
|
|
|
|
Log.logInfo("DidYouMean", "found "+this.set.size()+" terms; execution time: "+(System.currentTimeMillis()-startTime)+"ms");
|
|
|
|
|
|
|
|
return this.set;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
private class ChangingOneLetter extends Thread {
|
|
|
|
public void run() {
|
|
|
|
String s;
|
|
|
|
int count = 0;
|
|
|
|
for(int i=0; i<len; i++) {
|
|
|
|
for(int j=0; j<alphabet.length; j++) {
|
|
|
|
s = word.substring(0, i) + alphabet[j] + word.substring(i+1);
|
|
|
|
if (index.has(Word.word2hash(s))) {
|
|
|
|
set.add(s);
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private class DeletingOneLetter extends Thread {
|
|
|
|
public void run() {
|
|
|
|
String s;
|
|
|
|
int count = 0;
|
|
|
|
for(int i=0; i<len;i++) {
|
|
|
|
s = word.substring(0, i) + word.substring(i+1);
|
|
|
|
if (index.has(Word.word2hash(s))) {
|
|
|
|
set.add(s);
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private class AddingOneLetter extends Thread {
|
|
|
|
public void run() {
|
|
|
|
String s;
|
|
|
|
int count = 0;
|
|
|
|
for(int i=0; i<=len;i++) {
|
|
|
|
for(int j=0; j<alphabet.length; j++) {
|
|
|
|
s = word.substring(0, i) + alphabet[j] + word.substring(i);
|
|
|
|
if (index.has(Word.word2hash(s))) {
|
|
|
|
set.add(s);
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private class ReversingTwoConsecutiveLetters extends Thread {
|
|
|
|
public void run() {
|
|
|
|
String s;
|
|
|
|
int count = 0;
|
|
|
|
for(int i=0; i<word.length()-1; i++) {
|
|
|
|
s = word.substring(0,i)+word.charAt(i+1)+word.charAt(i)+word.substring(i+2);
|
|
|
|
if (index.has(Word.word2hash(s))) {
|
|
|
|
set.add(s);
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
private class wordSizeComparator implements Comparator<String> {
|
|
|
|
public int compare(final String o1, final String o2) {
|
|
|
|
final Integer i1 = index.count(Word.word2hash(o1));
|
|
|
|
final Integer i2 = index.count(Word.word2hash(o2));
|
|
|
|
return i2.compareTo(i1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|