- without I/O intensive sorting by count
- but with multiple threads

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6066 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
apfelmaennchen 16 years ago
parent f348190566
commit 6cde7ebf16

@ -1,13 +1,12 @@
package de.anomic.tools; package de.anomic.tools;
import java.util.Comparator; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.Set; import java.util.Set;
import java.util.TreeSet;
import de.anomic.kelondro.text.IndexCell; import de.anomic.kelondro.text.IndexCell;
import de.anomic.kelondro.text.referencePrototype.WordReference; import de.anomic.kelondro.text.referencePrototype.WordReference;
import de.anomic.kelondro.util.Log;
import de.anomic.plasma.parser.Word; import de.anomic.plasma.parser.Word;
// People make mistakes when they type words. // People make mistakes when they type words.
@ -19,79 +18,127 @@ import de.anomic.plasma.parser.Word;
public class DidYouMean { public class DidYouMean {
private static char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p', private static final char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
'q','r','s','t','u','v','w','x','y','z','\u00e4','\u00f6','\u00fc','\u00df'}; 'q','r','s','t','u','v','w','x','y','z','\u00e4','\u00f6','\u00fc','\u00df'};
private static final long TIMEOUT = 2000;
private final Set<String> set; private final Set<String> set;
private final IndexCell<WordReference> index; private final IndexCell<WordReference> index;
private String word; private String word;
private int len; private int len;
private Thread ChangingOneLetter;
private Thread AddingOneLetter;
private Thread DeletingOneLetter;
private Thread ReversingTwoConsecutiveLetters;
public DidYouMean(final IndexCell<WordReference> index) { public DidYouMean(final IndexCell<WordReference> index) {
this.set = new HashSet<String>(); // this.set = Collections.synchronizedSortedSet(new TreeSet<String>(new wordSizeComparator()));
this.set = Collections.synchronizedSet(new HashSet<String>());
this.word = ""; this.word = "";
this.len = 0; this.len = 0;
this.index = index; this.index = index;
this.ChangingOneLetter = new ChangingOneLetter();
this.AddingOneLetter = new AddingOneLetter();
this.DeletingOneLetter = new DeletingOneLetter();
this.ReversingTwoConsecutiveLetters = new ReversingTwoConsecutiveLetters();
} }
public Set<String> getSuggestion(final String word) { public Set<String> getSuggestion(final String word) {
long startTime = System.currentTimeMillis();
this.word = word.toLowerCase(); this.word = word.toLowerCase();
this.len = word.length(); this.len = word.length();
this.ChangingOneLetter.start();
this.AddingOneLetter.start();
this.DeletingOneLetter.start();
this.ReversingTwoConsecutiveLetters.start();
ChangingOneLetter(); try {
AddingOneLetter(); this.ChangingOneLetter.join(TIMEOUT);
DeletingOneLetter(); this.AddingOneLetter.join(TIMEOUT);
ReversingTwoConsecutiveLetters(); this.DeletingOneLetter.join(TIMEOUT);
this.ReversingTwoConsecutiveLetters.join(TIMEOUT);
} catch (InterruptedException e) {
}
final Iterator<String> it = this.set.iterator(); this.set.remove(word.toLowerCase());
final TreeSet<String> rset = new TreeSet<String>(new wordSizeComparator()); Log.logInfo("DidYouMean", "found "+this.set.size()+" terms; execution time: "+(System.currentTimeMillis()-startTime)+"ms");
String s;
while(it.hasNext()) { return this.set;
s = it.next();
if (index.has(Word.word2hash(s))) {
rset.add(s);
}
}
rset.remove(word.toLowerCase());
return rset;
} }
private void ChangingOneLetter() { private class ChangingOneLetter extends Thread {
for(int i=0; i<this.len; i++) { public void run() {
for(int j=0; j<alphabet.length; j++) { String s;
this.set.add(this.word.substring(0, i) + alphabet[j] + this.word.substring(i+1)); int count = 0;
for(int i=0; i<len; i++) {
for(int j=0; j<alphabet.length; j++) {
s = word.substring(0, i) + alphabet[j] + word.substring(i+1);
if (index.has(Word.word2hash(s))) {
set.add(s);
count++;
}
}
} }
} }
} }
private void DeletingOneLetter() { private class DeletingOneLetter extends Thread {
for(int i=0; i<this.len;i++) { public void run() {
this.set.add(this.word.substring(0, i) + this.word.substring(i+1)); String s;
} int count = 0;
for(int i=0; i<len;i++) {
s = word.substring(0, i) + word.substring(i+1);
if (index.has(Word.word2hash(s))) {
set.add(s);
count++;
}
}
}
} }
private void AddingOneLetter() { private class AddingOneLetter extends Thread {
for(int i=0; i<this.len;i++) { public void run() {
for(int j=0; j<alphabet.length; j++) { String s;
this.set.add(this.word.substring(0, i) + alphabet[j] + this.word.substring(i)); int count = 0;
} for(int i=0; i<=len;i++) {
for(int j=0; j<alphabet.length; j++) {
s = word.substring(0, i) + alphabet[j] + word.substring(i);
if (index.has(Word.word2hash(s))) {
set.add(s);
count++;
}
}
}
} }
} }
private void ReversingTwoConsecutiveLetters() { private class ReversingTwoConsecutiveLetters extends Thread {
for(int i=0; i<this.word.length()-1; i++) { public void run() {
this.set.add(this.word.substring(0,i)+this.word.charAt(i+1)+this.word.charAt(i)+this.word.substring(i+2)); String s;
int count = 0;
for(int i=0; i<word.length()-1; i++) {
s = word.substring(0,i)+word.charAt(i+1)+word.charAt(i)+word.substring(i+2);
if (index.has(Word.word2hash(s))) {
set.add(s);
count++;
}
}
} }
} }
public class wordSizeComparator implements Comparator<String> { /*
private class wordSizeComparator implements Comparator<String> {
public int compare(final String o1, final String o2) { public int compare(final String o1, final String o2) {
final Integer i1 = index.count(Word.word2hash(o1)); final Integer i1 = index.count(Word.word2hash(o1));
final Integer i2 = index.count(Word.word2hash(o2)); final Integer i2 = index.count(Word.word2hash(o2));
return i2.compareTo(i1); return i2.compareTo(i1);
} }
} }
*/
} }

Loading…
Cancel
Save