From 354b976110cb6563665d11a607e75d11f18cf3e5 Mon Sep 17 00:00:00 2001 From: Michael Christen Date: Thu, 5 Jan 2012 08:35:44 +0100 Subject: [PATCH] fix for concurrency problem and endless loop in /suggest.json --- source/de/anomic/data/DidYouMean.java | 216 +++++++++++++++++--------- 1 file changed, 146 insertions(+), 70 deletions(-) diff --git a/source/de/anomic/data/DidYouMean.java b/source/de/anomic/data/DidYouMean.java index e6ae07c8f..a15b1c339 100644 --- a/source/de/anomic/data/DidYouMean.java +++ b/source/de/anomic/data/DidYouMean.java @@ -48,14 +48,17 @@ public class DidYouMean { private static final char[] ALPHABET_KANJI = new char[512]; static { // this is very experimental: a very small subset of Kanji - for (char a = '\u3400'; a <= '\u34ff'; a++) ALPHABET_KANJI[0xff & (a - '\u3400')] = a; - for (char a = '\u4e00'; a <= '\u4eff'; a++) ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a; + for (char a = '\u3400'; a <= '\u34ff'; a++) { + ALPHABET_KANJI[0xff & (a - '\u3400')] = a; + } + for (char a = '\u4e00'; a <= '\u4eff'; a++) { + ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a; + } } - private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI}; - private static char[] alphabet = ALPHABET_LATIN; - private static final StringBuilder POISON_STRING = new StringBuilder("\n"); - public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors(); + private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI}; + private static final StringBuilder POISON_STRING = new StringBuilder("\n"); + public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors(); private static final wordLengthComparator WORD_LENGTH_COMPARATOR = new wordLengthComparator(); private final IndexCell index; @@ -66,6 +69,7 @@ public class DidYouMean { private boolean createGen; // keeps the value 'true' as long as no entry in guessLib is written private final SortedSet resultSet; private final indexSizeComparator INDEX_SIZE_COMPARATOR; + private char[] alphabet; /** @@ -88,25 +92,31 @@ public class DidYouMean { boolean alphafound = false; alphatest: for (final char[] alpha: ALPHABETS) { if (isAlphabet(alpha, testchar)) { - alphabet = alpha; + this.alphabet = new char[alpha.length]; + System.arraycopy(ALPHABET_LATIN, 0, this.alphabet, 0, alpha.length); alphafound = true; break alphatest; } } if (!alphafound) { // generate generic alphabet using simply a character block of 256 characters - final char firstchar = (char) ((0xff & (testchar / 256)) * 256); - final char lastchar = (char) (firstchar + 255); - alphabet = new char[256]; - for (char a = firstchar; a <= lastchar; a++) { - alphabet[0xff & (a - firstchar)] = a; + final int firstchar = (0xff & (testchar / 256)) * 256; + final int lastchar = firstchar + 255; + this.alphabet = new char[256]; + // test this with /suggest.json?q=%EF%BD%84 + for (int a = firstchar; a <= lastchar; a++) { + this.alphabet[0xff & (a - firstchar)] = (char) a; } } } } private static final boolean isAlphabet(final char[] alpha, final char testchar) { - for (final char a: alpha) if (a == testchar) return true; + for (final char a: alpha) { + if (a == testchar) { + return true; + } + } return false; } @@ -125,10 +135,15 @@ public class DidYouMean { * @return */ public SortedSet getSuggestions(final long timeout, final int preSortSelection) { - if (this.word.length() < MinimumInputWordLength) return this.resultSet; // return nothing if input is too short + if (this.word.length() < MinimumInputWordLength) + { + return this.resultSet; // return nothing if input is too short + } final long startTime = System.currentTimeMillis(); final long timelimit = startTime + timeout; - if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index); + if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) { + return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index); + } final SortedSet preSorted = getSuggestions(timeout); if (System.currentTimeMillis() > timelimit) { Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: " @@ -138,8 +153,12 @@ public class DidYouMean { final ReversibleScoreMap scored = new ClusteredScoreMap(StringBuilderComparator.CASE_INSENSITIVE_ORDER); for (final StringBuilder s: preSorted) { - if (System.currentTimeMillis() > timelimit) break; - if (!(scored.sizeSmaller(2 * preSortSelection))) break; + if (System.currentTimeMillis() > timelimit) { + break; + } + if (!(scored.sizeSmaller(2 * preSortSelection))) { + break; + } scored.inc(s, this.index.count(Word.word2hash(s))); } final SortedSet countSorted = Collections.synchronizedSortedSet(new TreeSet(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR))); @@ -147,8 +166,12 @@ public class DidYouMean { while (!scored.isEmpty() && countSorted.size() < preSortSelection) { final StringBuilder s = scored.getMaxKey(); final int score = scored.delete(s); - if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s); - if (System.currentTimeMillis() > timelimit) break; + if (s.length() >= MinimumOutputWordLength && score > wc) { + countSorted.add(s); + } + if (System.currentTimeMillis() > timelimit) { + break; + } } // finished @@ -180,11 +203,19 @@ public class DidYouMean { final SortedSet result = new TreeSet(StringBuilderComparator.CASE_INSENSITIVE_ORDER); StringBuilder sb; for (int i = 0; i < words.length; i++) { - if (s[i].isEmpty()) continue; + if (s[i].isEmpty()) { + continue; + } sb = new StringBuilder(20); for (int j = 0; j < words.length; j++) { - if (j > 0) sb.append(' '); - if (i == j) sb.append(s[j].first()); else sb.append(words[j]); + if (j > 0) { + sb.append(' '); + } + if (i == j) { + sb.append(s[j].first()); + } else { + sb.append(words[j]); + } } result.add(sb); } @@ -211,10 +242,12 @@ public class DidYouMean { // get a single recommendation for the word without altering the word final Set libr = LibraryProvider.dymLib.recommend(this.word); for (final StringBuilder t: libr) { - if (!t.equals(this.word)) try { - this.createGen = false; - this.guessLib.put(t); - } catch (final InterruptedException e) {} + if (!t.equals(this.word)) { + try { + this.createGen = false; + this.guessLib.put(t); + } catch (final InterruptedException e) {} + } } // create and start producers @@ -226,34 +259,46 @@ public class DidYouMean { producers[1] = new AddingOneLetter(); producers[2] = new DeletingOneLetter(); producers[3] = new ReversingTwoConsecutiveLetters(); - for (final Thread t: producers) t.start(); + for (final Thread t: producers) { + t.start(); + } // start more consumers if there are more cores - if (consumers.length > 1) for (int i = 1; i < consumers.length; i++) { - consumers[i] = new Consumer(); - consumers[i].start(); + if (consumers.length > 1) { + for (int i = 1; i < consumers.length; i++) { + consumers[i] = new Consumer(); + consumers[i].start(); + } } // now decide which kind of guess is better // we take guessLib entries as long as there is any entry in it // to see if this is the case, we must wait for termination of the producer - for (final Thread t: producers) try { t.join(); } catch (final InterruptedException e) {} + for (final Thread t: producers) { + try { t.join(); } catch (final InterruptedException e) {} + } // if there is not any entry in guessLib, then transfer all entries from the // guessGen to guessLib - if (this.createGen) try { - this.guessGen.put(POISON_STRING); - StringBuilder s; - while (!(s = this.guessGen.take()).equals(POISON_STRING)) this.guessLib.put(s); - } catch (final InterruptedException e) {} + if (this.createGen) { + try { + this.guessGen.put(POISON_STRING); + StringBuilder s; + while (!(s = this.guessGen.take()).equals(POISON_STRING)) { + this.guessLib.put(s); + } + } catch (final InterruptedException e) {} + } // put poison into guessLib to terminate consumers - for (@SuppressWarnings("unused") final Consumer c: consumers) + for (@SuppressWarnings("unused") final Consumer c: consumers) { try { this.guessLib.put(POISON_STRING); } catch (final InterruptedException e) {} + } // wait for termination of consumer - for (final Consumer c: consumers) + for (final Consumer c: consumers) { try { c.join(); } catch (final InterruptedException e) {} + } // we don't want the given word in the result this.resultSet.remove(this.word); @@ -265,7 +310,9 @@ public class DidYouMean { private void test(final StringBuilder s) throws InterruptedException { final Set libr = LibraryProvider.dymLib.recommend(s); libr.addAll(LibraryProvider.geoLoc.recommend(s)); - if (!libr.isEmpty()) this.createGen = false; + if (!libr.isEmpty()) { + this.createGen = false; + } for (final StringBuilder t: libr) { this.guessLib.put(t); } @@ -284,16 +331,20 @@ public class DidYouMean { @Override public void run() { char m; - for (int i = 0; i < DidYouMean.this.wordLen; i++) try { - m = DidYouMean.this.word.charAt(i); - for (final char c: alphabet) { - if (m != c) { - final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1)); - test(ts); + for (int i = 0; i < DidYouMean.this.wordLen; i++) { + try { + m = DidYouMean.this.word.charAt(i); + for (final char c: DidYouMean.this.alphabet) { + if (m != c) { + final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1)); + test(ts); + } + if (System.currentTimeMillis() > DidYouMean.this.timeLimit) { + return; + } } - if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return; - } - } catch (final InterruptedException e) {} + } catch (final InterruptedException e) {} + } } } @@ -306,11 +357,15 @@ public class DidYouMean { @Override public void run() { - for (int i = 0; i < DidYouMean.this.wordLen; i++) try { - final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1)); - test(ts); - if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return; - } catch (final InterruptedException e) {} + for (int i = 0; i < DidYouMean.this.wordLen; i++) { + try { + final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1)); + test(ts); + if (System.currentTimeMillis() > DidYouMean.this.timeLimit) { + return; + } + } catch (final InterruptedException e) {} + } } } @@ -324,13 +379,17 @@ public class DidYouMean { @Override public void run() { - for (int i = 0; i <= DidYouMean.this.wordLen; i++) try { - for (final char c: alphabet) { - final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i)); - test(ts); - if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return; - } - } catch (final InterruptedException e) {} + for (int i = 0; i <= DidYouMean.this.wordLen; i++) { + try { + for (final char c: DidYouMean.this.alphabet) { + final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i)); + test(ts); + if (System.currentTimeMillis() > DidYouMean.this.timeLimit) { + return; + } + } + } catch (final InterruptedException e) {} + } } } @@ -343,11 +402,15 @@ public class DidYouMean { @Override public void run() { - for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) try { - final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2)); - test(ts); - if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return; - } catch (final InterruptedException e) {} + for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) { + try { + final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2)); + test(ts); + if (System.currentTimeMillis() > DidYouMean.this.timeLimit) { + return; + } + } catch (final InterruptedException e) {} + } } } @@ -364,8 +427,12 @@ public class DidYouMean { StringBuilder s; try { while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) { - if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) DidYouMean.this.resultSet.add(s); - if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return; + if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) { + DidYouMean.this.resultSet.add(s); + } + if (System.currentTimeMillis() > DidYouMean.this.timeLimit) { + return; + } } } catch (final InterruptedException e) {} } @@ -377,10 +444,13 @@ public class DidYouMean { */ private class indexSizeComparator implements Comparator { + @Override public int compare(final StringBuilder o1, final StringBuilder o2) { final int i1 = DidYouMean.this.index.count(Word.word2hash(o1)); final int i2 = DidYouMean.this.index.count(Word.word2hash(o2)); - if (i1 == i2) return WORD_LENGTH_COMPARATOR.compare(o1, o2); + if (i1 == i2) { + return WORD_LENGTH_COMPARATOR.compare(o1, o2); + } return (i1 < i2) ? 1 : -1; // '<' is correct, because the largest count shall be ordered to be the first position in the result } } @@ -391,10 +461,13 @@ public class DidYouMean { */ private static class wordLengthComparator implements Comparator { + @Override public int compare(final StringBuilder o1, final StringBuilder o2) { final int i1 = o1.length(); final int i2 = o2.length(); - if (i1 == i2) return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2); + if (i1 == i2) { + return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2); + } return (i1 < i2) ? 1 : -1; // '<' is correct, because the longest word shall be first } @@ -411,10 +484,13 @@ public class DidYouMean { this.secondaryComparator = secondaryComparator; } + @Override public int compare(final StringBuilder o1, final StringBuilder o2) { final boolean o1m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o1, this.head); final boolean o2m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o2, this.head); - if ((o1m && o2m) || (!o1m && !o2m)) return this.secondaryComparator.compare(o1, o2); + if ((o1m && o2m) || (!o1m && !o2m)) { + return this.secondaryComparator.compare(o1, o2); + } return o1m ? -1 : 1; } }