|
|
|
@ -1,6 +1,8 @@
|
|
|
|
|
package net.yacy.data;
|
|
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Collection;
|
|
|
|
|
import java.util.Collections;
|
|
|
|
|
import java.util.Comparator;
|
|
|
|
|
import java.util.ConcurrentModificationException;
|
|
|
|
@ -10,7 +12,6 @@ import java.util.Map;
|
|
|
|
|
import java.util.Set;
|
|
|
|
|
import java.util.SortedSet;
|
|
|
|
|
import java.util.TreeSet;
|
|
|
|
|
import java.util.concurrent.LinkedBlockingQueue;
|
|
|
|
|
|
|
|
|
|
import org.apache.solr.client.solrj.SolrQuery;
|
|
|
|
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
|
|
|
@ -76,17 +77,14 @@ public class DidYouMean {
|
|
|
|
|
private static final char[][] ALPHABETS = {
|
|
|
|
|
ALPHABET_LATIN, ALPHABET_KANJI, ALPHABET_HIRAGANA, ALPHABET_KATAKANA,
|
|
|
|
|
ALPHABET_CJK_UNIFIED_IDEOGRAPHS_Part1, ALPHABET_CJK_UNIFIED_IDEOGRAPHS_Part2, ALPHABET_CJK_UNIFIED_IDEOGRAPHS_Part3, ALPHABET_CJK_UNIFIED_IDEOGRAPHS_Part4};
|
|
|
|
|
private static final StringBuilder POISON_STRING = new StringBuilder("\n");
|
|
|
|
|
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
|
|
|
|
|
private static final wordLengthComparator WORD_LENGTH_COMPARATOR = new wordLengthComparator();
|
|
|
|
|
|
|
|
|
|
private final Segment segment;
|
|
|
|
|
private final StringBuilder word;
|
|
|
|
|
private final int wordLen;
|
|
|
|
|
private final LinkedBlockingQueue<StringBuilder> guesses;
|
|
|
|
|
private long timeLimit;
|
|
|
|
|
private final SortedSet<StringBuilder> resultSet;
|
|
|
|
|
private final indexSizeComparator INDEX_SIZE_COMPARATOR;
|
|
|
|
|
private char[] alphabet;
|
|
|
|
|
private boolean more;
|
|
|
|
|
|
|
|
|
@ -99,8 +97,6 @@ public class DidYouMean {
|
|
|
|
|
this.word = word0;
|
|
|
|
|
this.wordLen = this.word.length();
|
|
|
|
|
this.segment = segment;
|
|
|
|
|
this.guesses = new LinkedBlockingQueue<StringBuilder>();
|
|
|
|
|
this.INDEX_SIZE_COMPARATOR = new indexSizeComparator();
|
|
|
|
|
this.more = segment.connectedRWI() && segment.RWICount() > 0; // with RWIs connected the guessing is super-fast
|
|
|
|
|
|
|
|
|
|
// identify language
|
|
|
|
@ -144,7 +140,6 @@ public class DidYouMean {
|
|
|
|
|
|
|
|
|
|
public void reset() {
|
|
|
|
|
this.resultSet.clear();
|
|
|
|
|
this.guesses.clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -155,7 +150,7 @@ public class DidYouMean {
|
|
|
|
|
* @param preSortSelection the number of words that participate in the IO-intensive sort
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
|
|
|
|
|
public Collection<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
|
|
|
|
|
if (this.word.length() < MinimumInputWordLength) {
|
|
|
|
|
return this.resultSet; // return nothing if input is too short
|
|
|
|
|
}
|
|
|
|
@ -167,47 +162,33 @@ public class DidYouMean {
|
|
|
|
|
return getSuggestions(this.word.substring(0, lastIndexOfSpace), this.word.substring(lastIndexOfSpace + 1), timeout, preSortSelection, this.segment);
|
|
|
|
|
}
|
|
|
|
|
final SortedSet<StringBuilder> preSorted = getSuggestions(timeout);
|
|
|
|
|
/*
|
|
|
|
|
if (System.currentTimeMillis() > timelimit) {
|
|
|
|
|
ConcurrentLog.info("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: "
|
|
|
|
|
+ (System.currentTimeMillis() - startTime) + "ms");
|
|
|
|
|
return preSorted;
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
|
|
|
|
|
try {
|
|
|
|
|
for (final StringBuilder s: preSorted) {
|
|
|
|
|
if (System.currentTimeMillis() > timelimit) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (!(scored.sizeSmaller(2 * preSortSelection))) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
scored.inc(s, this.segment.getWordCountGuess(s.toString()));
|
|
|
|
|
}
|
|
|
|
|
} catch (final ConcurrentModificationException e) {
|
|
|
|
|
}
|
|
|
|
|
final SortedSet<StringBuilder> countSorted = Collections.synchronizedSortedSet(new TreeSet<StringBuilder>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
|
|
|
|
|
final int wc = this.segment.getWordCountGuess(this.word.toString()); // all counts must be greater than this
|
|
|
|
|
while (!scored.isEmpty() && countSorted.size() < preSortSelection) {
|
|
|
|
|
final StringBuilder s = scored.getMaxKey();
|
|
|
|
|
final int score = scored.delete(s);
|
|
|
|
|
if (s.length() >= MinimumOutputWordLength && score > wc) {
|
|
|
|
|
countSorted.add(s);
|
|
|
|
|
Collection<StringBuilder> countSorted = new ArrayList<StringBuilder>();
|
|
|
|
|
if (this.more) {
|
|
|
|
|
final int wc = this.segment.getWordCountGuess(this.word.toString()); // all counts must be greater than this
|
|
|
|
|
try {
|
|
|
|
|
for (final StringBuilder s: preSorted) {
|
|
|
|
|
if (System.currentTimeMillis() > timelimit) break;
|
|
|
|
|
if (!(scored.sizeSmaller(2 * preSortSelection))) break;
|
|
|
|
|
String s0 = s.toString();
|
|
|
|
|
int wcg = s0.indexOf(' ') > 0 ? s0.length() * 100 : this.segment.getWordCountGuess(s0);
|
|
|
|
|
if (wcg > wc) scored.inc(s, wcg);
|
|
|
|
|
}
|
|
|
|
|
} catch (final ConcurrentModificationException e) {
|
|
|
|
|
}
|
|
|
|
|
if (System.currentTimeMillis() > timelimit) {
|
|
|
|
|
break;
|
|
|
|
|
Iterator<StringBuilder> i = scored.keys(false);
|
|
|
|
|
while (i.hasNext()) countSorted.add(i.next());
|
|
|
|
|
} else {
|
|
|
|
|
try {
|
|
|
|
|
for (final StringBuilder s: preSorted) {
|
|
|
|
|
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(s, this.word) ||
|
|
|
|
|
StringBuilderComparator.CASE_INSENSITIVE_ORDER.endsWith(this.word, s)) countSorted.add(s);
|
|
|
|
|
}
|
|
|
|
|
} catch (final ConcurrentModificationException e) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// finished
|
|
|
|
|
/*
|
|
|
|
|
if (countSorted.isEmpty()) {
|
|
|
|
|
ConcurrentLog.info("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (2); execution time: "
|
|
|
|
|
+ (System.currentTimeMillis() - startTime) + "ms");
|
|
|
|
|
return preSorted;
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
ConcurrentLog.info("DidYouMean", "found " + preSorted.size() + " unsorted terms, returned " + countSorted.size() + " sorted suggestions; execution time: "
|
|
|
|
|
+ (System.currentTimeMillis() - startTime) + "ms");
|
|
|
|
|
|
|
|
|
@ -222,7 +203,7 @@ public class DidYouMean {
|
|
|
|
|
* @param preSortSelection
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
private static SortedSet<StringBuilder> getSuggestions(final String head, final String tail, final long timeout, final int preSortSelection, final Segment segment) {
|
|
|
|
|
private static Collection<StringBuilder> getSuggestions(final String head, final String tail, final long timeout, final int preSortSelection, final Segment segment) {
|
|
|
|
|
final SortedSet<StringBuilder> result = new TreeSet<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
|
|
|
|
|
int count = 30;
|
|
|
|
|
final SolrQuery solrQuery = new SolrQuery();
|
|
|
|
@ -239,7 +220,6 @@ public class DidYouMean {
|
|
|
|
|
solrQuery.addHighlightField(CollectionSchema.title.getSolrFieldName());
|
|
|
|
|
solrQuery.addHighlightField(CollectionSchema.text_t.getSolrFieldName());
|
|
|
|
|
solrQuery.setFields(); // no fields wanted! only snippets
|
|
|
|
|
//List<String> snippets = new ArrayList<String>();
|
|
|
|
|
OrderedScoreMap<String> snippets = new OrderedScoreMap<String>(null);
|
|
|
|
|
try {
|
|
|
|
|
QueryResponse response = segment.fulltext().getDefaultConnector().getResponseByParams(solrQuery);
|
|
|
|
@ -313,108 +293,68 @@ public class DidYouMean {
|
|
|
|
|
private SortedSet<StringBuilder> getSuggestions(final long timeout) {
|
|
|
|
|
final long startTime = System.currentTimeMillis();
|
|
|
|
|
this.timeLimit = startTime + timeout;
|
|
|
|
|
|
|
|
|
|
// create one consumer thread that checks the guessLib queue
|
|
|
|
|
// for occurrences in the index. If the producers are started next, their
|
|
|
|
|
// results can be consumers directly
|
|
|
|
|
final Consumer[] consumers = new Consumer[AVAILABLE_CPU];
|
|
|
|
|
consumers[0] = new Consumer();
|
|
|
|
|
consumers[0].start();
|
|
|
|
|
|
|
|
|
|
// get a single recommendation for the word without altering the word
|
|
|
|
|
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(this.word);
|
|
|
|
|
for (final StringBuilder t: libr) {
|
|
|
|
|
if (!t.equals(this.word)) {
|
|
|
|
|
try {
|
|
|
|
|
this.guesses.put(t);
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
|
|
|
|
|
Thread[] producers = null;
|
|
|
|
|
if (this.more) {
|
|
|
|
|
// create and start producers
|
|
|
|
|
// the CPU load to create the guessed words is very low, but the testing
|
|
|
|
|
// against the library may be CPU intensive. Since it is possible to test
|
|
|
|
|
// words in the library concurrently, it is a good idea to start separate threads
|
|
|
|
|
producers = new Thread[4];
|
|
|
|
|
producers[0] = new ChangingOneLetter();
|
|
|
|
|
producers[1] = new AddingOneLetter();
|
|
|
|
|
producers[2] = new DeletingOneLetter();
|
|
|
|
|
producers[3] = new ReversingTwoConsecutiveLetters();
|
|
|
|
|
for (final Thread t: producers) {
|
|
|
|
|
t.start();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// create and start producers
|
|
|
|
|
// the CPU load to create the guessed words is very low, but the testing
|
|
|
|
|
// against the library may be CPU intensive. Since it is possible to test
|
|
|
|
|
// words in the library concurrently, it is a good idea to start separate threads
|
|
|
|
|
final Thread[] producers = new Thread[4];
|
|
|
|
|
producers[0] = new ChangingOneLetter();
|
|
|
|
|
producers[1] = new AddingOneLetter();
|
|
|
|
|
producers[2] = new DeletingOneLetter();
|
|
|
|
|
producers[3] = new ReversingTwoConsecutiveLetters();
|
|
|
|
|
for (final Thread t: producers) {
|
|
|
|
|
t.start();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// start more consumers if there are more cores
|
|
|
|
|
if (consumers.length > 1) {
|
|
|
|
|
for (int i = 1; i < consumers.length; i++) {
|
|
|
|
|
consumers[i] = new Consumer();
|
|
|
|
|
consumers[i].start();
|
|
|
|
|
test(this.word);
|
|
|
|
|
this.resultSet.addAll(getSuggestions(this.word.toString(), "", timeout, 10, this.segment));
|
|
|
|
|
|
|
|
|
|
if (this.more) {
|
|
|
|
|
// finish the producer
|
|
|
|
|
for (final Thread t: producers) {
|
|
|
|
|
long wait = this.timeLimit - System.currentTimeMillis();
|
|
|
|
|
if (wait > 0) try {
|
|
|
|
|
t.join(wait);
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// now decide which kind of guess is better
|
|
|
|
|
// we take guessLib entries as long as there is any entry in it
|
|
|
|
|
// to see if this is the case, we must wait for termination of the producer
|
|
|
|
|
for (final Thread t: producers) {
|
|
|
|
|
long wait = this.timeLimit - System.currentTimeMillis();
|
|
|
|
|
if (wait > 0) try {
|
|
|
|
|
t.join(wait);
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// put poison into guessLib to terminate consumers
|
|
|
|
|
for (@SuppressWarnings("unused") final Consumer c: consumers) {
|
|
|
|
|
try { this.guesses.put(POISON_STRING); } catch (final InterruptedException e) {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// wait for termination of consumer
|
|
|
|
|
for (final Consumer c: consumers) {
|
|
|
|
|
long wait = this.timeLimit - System.currentTimeMillis();
|
|
|
|
|
if (wait > 0) try {
|
|
|
|
|
c.join(wait);
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
if (c.isAlive()) c.interrupt();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// we don't want the given word in the result
|
|
|
|
|
this.resultSet.remove(this.word);
|
|
|
|
|
|
|
|
|
|
return this.resultSet;
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void test(final StringBuilder s) throws InterruptedException {
|
|
|
|
|
private void test(final StringBuilder s) {
|
|
|
|
|
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(s);
|
|
|
|
|
libr.addAll(LibraryProvider.geoLoc.recommend(s));
|
|
|
|
|
for (final StringBuilder t: libr) {
|
|
|
|
|
this.guesses.put(t);
|
|
|
|
|
if (t.length() >= MinimumOutputWordLength) this.resultSet.add(t);
|
|
|
|
|
}
|
|
|
|
|
this.guesses.add(s);
|
|
|
|
|
if (s.length() >= MinimumOutputWordLength) this.resultSet.add(s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* DidYouMean's producer thread that changes one letter (e.g. bat/cat) for a given term
|
|
|
|
|
* based on the given alphabet and puts it on the blocking queue, to be 'consumed' by a consumer thread.<p/>
|
|
|
|
|
* <b>Note:</b> the loop runs (alphabet.length * len) tests.
|
|
|
|
|
*/
|
|
|
|
|
public class ChangingOneLetter extends Thread {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
char m;
|
|
|
|
|
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
|
|
|
|
|
try {
|
|
|
|
|
m = DidYouMean.this.word.charAt(i);
|
|
|
|
|
for (final char c: DidYouMean.this.alphabet) {
|
|
|
|
|
if (m != c) {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
|
|
|
|
|
test(ts);
|
|
|
|
|
}
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
m = DidYouMean.this.word.charAt(i);
|
|
|
|
|
for (final char c: DidYouMean.this.alphabet) {
|
|
|
|
|
if (m != c) {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
|
|
|
|
|
test(ts);
|
|
|
|
|
}
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -425,20 +365,14 @@ public class DidYouMean {
|
|
|
|
|
* <b>Note:</b> the loop runs (len) tests.
|
|
|
|
|
*/
|
|
|
|
|
private class DeletingOneLetter extends Thread {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
|
|
|
|
|
try {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
|
|
|
|
|
test(ts);
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
}
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
|
|
|
|
|
test(ts);
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -447,21 +381,16 @@ public class DidYouMean {
|
|
|
|
|
* <b>Note:</b> the loop runs (alphabet.length * len) tests.
|
|
|
|
|
*/
|
|
|
|
|
private class AddingOneLetter extends Thread {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
for (int i = 0; i <= DidYouMean.this.wordLen; i++) {
|
|
|
|
|
try {
|
|
|
|
|
for (final char c: DidYouMean.this.alphabet) {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
|
|
|
|
|
test(ts);
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
for (int i = 0; i <= DidYouMean.this.wordLen; i++) {
|
|
|
|
|
for (final char c: DidYouMean.this.alphabet) {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
|
|
|
|
|
test(ts);
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -470,68 +399,21 @@ public class DidYouMean {
|
|
|
|
|
* <b>Note:</b> the loop runs (len-1) tests.
|
|
|
|
|
*/
|
|
|
|
|
private class ReversingTwoConsecutiveLetters extends Thread {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) {
|
|
|
|
|
try {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
|
|
|
|
|
test(ts);
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* DidYouMean's consumer thread takes a String object (term) from the blocking queue
|
|
|
|
|
* and checks if it is contained in YaCy's RWI index.
|
|
|
|
|
* <b>Note:</b> this causes no or moderate i/o as it uses the efficient index.has() method.
|
|
|
|
|
*/
|
|
|
|
|
private class Consumer extends Thread {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
StringBuilder s;
|
|
|
|
|
try {
|
|
|
|
|
while ((s = DidYouMean.this.guesses.take()) != POISON_STRING) {
|
|
|
|
|
if (s.length() >= MinimumOutputWordLength) {
|
|
|
|
|
DidYouMean.this.resultSet.add(s);
|
|
|
|
|
}
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} catch (final InterruptedException e) {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* indexSizeComparator is used by DidYouMean to order terms by index.count()
|
|
|
|
|
* <b>Warning:</b> this causes heavy i/o
|
|
|
|
|
*/
|
|
|
|
|
private class indexSizeComparator implements Comparator<StringBuilder> {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public int compare(final StringBuilder o1, final StringBuilder o2) {
|
|
|
|
|
final int i1 = DidYouMean.this.segment.getWordCountGuess(o1.toString());
|
|
|
|
|
final int i2 = DidYouMean.this.segment.getWordCountGuess(o2.toString());
|
|
|
|
|
if (i1 == i2) {
|
|
|
|
|
return WORD_LENGTH_COMPARATOR.compare(o1, o2);
|
|
|
|
|
public void run() {
|
|
|
|
|
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) {
|
|
|
|
|
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
|
|
|
|
|
test(ts);
|
|
|
|
|
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
|
|
|
|
}
|
|
|
|
|
return (i1 < i2) ? 1 : -1; // '<' is correct, because the largest count shall be ordered to be the first position in the result
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* wordLengthComparator is used by DidYouMean to order terms by the term length
|
|
|
|
|
* This is the default order if the indexSizeComparator is not used
|
|
|
|
|
*/
|
|
|
|
|
private static class wordLengthComparator implements Comparator<StringBuilder> {
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public int compare(final StringBuilder o1, final StringBuilder o2) {
|
|
|
|
|
final int i1 = o1.length();
|
|
|
|
@ -541,7 +423,6 @@ public class DidYouMean {
|
|
|
|
|
}
|
|
|
|
|
return (i1 < i2) ? 1 : -1; // '<' is correct, because the longest word shall be first
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|