diff --git a/source/de/anomic/tools/DidYouMean.java b/source/de/anomic/tools/DidYouMean.java
index cab32cf41..ab7e984e5 100644
--- a/source/de/anomic/tools/DidYouMean.java
+++ b/source/de/anomic/tools/DidYouMean.java
@@ -1,8 +1,10 @@
package de.anomic.tools;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;
+import java.util.TreeSet;
import java.util.concurrent.LinkedBlockingQueue;
import de.anomic.kelondro.text.IndexCell;
@@ -10,19 +12,26 @@ import de.anomic.kelondro.text.referencePrototype.WordReference;
import de.anomic.plasma.parser.Word;
import de.anomic.yacy.logging.Log;
-// People make mistakes when they type words.
-// The most common mistakes are the four categories listed below:
-// (1) Changing one letter: bat / cat;
-// (2) Adding one letter: bat / boat;
-// (3) Deleting one letter: frog / fog; or
-// (4) Reversing two consecutive letters: two / tow.
-
+/**
+ * People make mistakes when they type words.
+ * The most common mistakes are the four categories listed below:
+ *
+ * - Changing one letter: bat / cat;
+ * - Adding one letter: bat / boat;
+ * - Deleting one letter: frog / fog; or
+ * - Reversing two consecutive letters: two / tow.
+ *
+ * DidYouMean provides producer threads, that feed a blocking queue with word variations according to
+ * the above mentioned four categories. Consumer threads check then the generated word variations against a term index.
+ * Only words contained in the term index are return by the getSuggestion method.
+ * @author apfelmaennchen
+ */
public class DidYouMean {
private static final char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
'q','r','s','t','u','v','w','x','y','z','\u00e4','\u00f6','\u00fc','\u00df'};
- private static final long TIMEOUT = 500;
+ public static final int availableCPU = Runtime.getRuntime().availableProcessors();
final LinkedBlockingQueue queue = new LinkedBlockingQueue();
private final Set set;
@@ -30,20 +39,52 @@ public class DidYouMean {
private String word;
private int len;
- public DidYouMean(final IndexCell index) {
- // this.set = Collections.synchronizedSortedSet(new TreeSet(new wordSizeComparator()));
- this.set = Collections.synchronizedSet(new HashSet());
+
+ /**
+ * @param index a termIndex - most likely retrieved from a switchboard object.
+ * @param sort true/false - sorts the resulting TreeSet by index.count(); Warning: this causes heavy i/o.
+ */
+ public DidYouMean(final IndexCell index, boolean sort) {
+ if(sort)
+ this.set = Collections.synchronizedSortedSet(new TreeSet(new wordSizeComparator()));
+ else
+ this.set = Collections.synchronizedSet(new HashSet());
this.word = "";
this.len = 0;
this.index = index;
}
+ /**
+ * @param index a termIndex - most likely retrieved from a switchboard object.
+ */
+ public DidYouMean(final IndexCell index) {
+ this(index, false);
+ }
+
+ /**
+ * This method triggers the 4 producer and 8 consumer threads of DidYouMean.
+ * Note: the default timeout is 500ms
+ * @param word a String with a single word
+ * @return a Set<String> with word variations contained in index.
+ */
public Set getSuggestion(final String word) {
+ return getSuggestion(word, 500);
+ }
+
+ /**
+ * This method triggers the 4 producer and 8 consumer threads of the DidYouMean object.
+ * @param word a String with a single word
+ * @param timeout execution time in ms.
+ * @return a Set<String> with word variations contained in term index.
+ */
+ public Set getSuggestion(final String word, long timeout) {
long startTime = System.currentTimeMillis();
this.word = word.toLowerCase();
this.len = word.length();
// create producers
+ // the intention of the 4 producers is to mix results, as there
+ // is currently no default sorting or ranking due to the i/o performance of index.count()
Thread[] producers = new Thread[4];
producers[0] = new ChangingOneLetter();
producers[1] = new AddingOneLetter();
@@ -55,8 +96,8 @@ public class DidYouMean {
producers[i].start();
}
- // create and start 8 consumers threads
- Thread[] consumers = new Thread[8];
+ // create and start consumers threads
+ Thread[] consumers = new Thread[availableCPU];
for (int i=0; i
+ * Note: the loop runs (alphabet.length * len) tests.
+ */
private class ChangingOneLetter extends Thread {
- // tests: alphabet.length * len
public void run() {
String s;
for(int i=0; i
+ * Note: the loop runs (len) tests.
+ */
private class DeletingOneLetter extends Thread {
- // tests: len
public void run() {
String s;
for(int i=0; i
+ * Note: the loop runs (alphabet.length * len) tests.
+ */
private class AddingOneLetter extends Thread {
- // tests: alphabet.length * len
public void run() {
String s;
for(int i=0; i<=len;i++) {
@@ -153,10 +196,13 @@ public class DidYouMean {
}
}
}
-
+ /**
+ * DidYouMean's producer thread that reverses any two consecutive letters (e.g. two/tow) for a given term
+ * and puts it on the blocking queue, to be 'consumed' by a consumer thread.
+ * Note: the loop runs (len-1) tests.
+ */
private class ReversingTwoConsecutiveLetters extends Thread {
- // tests: (len - 1)
public void run() {
String s;
for(int i=0; i
+ * Note: this causes no or moderate i/o as it uses the efficient index.has() method.
+ */
class Consumer extends Thread {
public void run() {
@@ -190,9 +240,11 @@ public class DidYouMean {
set.add(s);
}
}
- }
-
- /*
+ }
+ /**
+ * wordSizeComparator is used by DidYouMean to order terms by index.count()
+ * Warning: this causes heavy i/o
+ */
private class wordSizeComparator implements Comparator {
public int compare(final String o1, final String o2) {
final Integer i1 = index.count(Word.word2hash(o1));
@@ -200,7 +252,7 @@ public class DidYouMean {
return i2.compareTo(i1);
}
}
- */
+
}