diff --git a/source/net/yacy/data/DidYouMean.java b/source/net/yacy/data/DidYouMean.java index da281cb13..b38a5eff6 100644 --- a/source/net/yacy/data/DidYouMean.java +++ b/source/net/yacy/data/DidYouMean.java @@ -83,9 +83,8 @@ public class DidYouMean { private final Segment segment; private final StringBuilder word; private final int wordLen; - private final LinkedBlockingQueue guessGen, guessLib; + private final LinkedBlockingQueue guesses; private long timeLimit; - private boolean createGen; // keeps the value 'true' as long as no entry in guessLib is written private final SortedSet resultSet; private final indexSizeComparator INDEX_SIZE_COMPARATOR; private char[] alphabet; @@ -100,9 +99,7 @@ public class DidYouMean { this.word = word0; this.wordLen = this.word.length(); this.segment = segment; - this.guessGen = new LinkedBlockingQueue(); - this.guessLib = new LinkedBlockingQueue(); - this.createGen = true; + this.guesses = new LinkedBlockingQueue(); this.INDEX_SIZE_COMPARATOR = new indexSizeComparator(); this.more = segment.connectedRWI() && segment.RWICount() > 0; // with RWIs connected the guessing is super-fast @@ -147,8 +144,7 @@ public class DidYouMean { public void reset() { this.resultSet.clear(); - this.guessGen.clear(); - this.guessLib.clear(); + this.guesses.clear(); } /** @@ -171,12 +167,13 @@ public class DidYouMean { return getSuggestions(this.word.substring(0, lastIndexOfSpace), this.word.substring(lastIndexOfSpace + 1), timeout, preSortSelection, this.segment); } final SortedSet preSorted = getSuggestions(timeout); + /* if (System.currentTimeMillis() > timelimit) { ConcurrentLog.info("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: " + (System.currentTimeMillis() - startTime) + "ms"); return preSorted; } - +*/ final ReversibleScoreMap scored = new ClusteredScoreMap(StringBuilderComparator.CASE_INSENSITIVE_ORDER); try { for (final StringBuilder s: preSorted) { @@ -204,11 +201,13 @@ public class DidYouMean { } // finished + /* if (countSorted.isEmpty()) { ConcurrentLog.info("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (2); execution time: " + (System.currentTimeMillis() - startTime) + "ms"); return preSorted; } + */ ConcurrentLog.info("DidYouMean", "found " + preSorted.size() + " unsorted terms, returned " + countSorted.size() + " sorted suggestions; execution time: " + (System.currentTimeMillis() - startTime) + "ms"); @@ -225,12 +224,11 @@ public class DidYouMean { */ private static SortedSet getSuggestions(final String head, final String tail, final long timeout, final int preSortSelection, final Segment segment) { final SortedSet result = new TreeSet(StringBuilderComparator.CASE_INSENSITIVE_ORDER); - int count = 20; + int count = 30; final SolrQuery solrQuery = new SolrQuery(); solrQuery.setParam("defType", "edismax"); solrQuery.setFacet(false); - solrQuery.setQuery(CollectionSchema.title.getSolrFieldName() + ":\"" + head + "\"^10 OR " + CollectionSchema.text_t.getSolrFieldName() + ":\"" + head + "\""); - if (tail.length() > 0) solrQuery.setFilterQueries(CollectionSchema.text_t.getSolrFieldName() + ":/.*" + head + " " + tail + ".*/"); + solrQuery.setQuery(CollectionSchema.title.getSolrFieldName() + ":\"" + head + "\"^10 OR " + CollectionSchema.text_t.getSolrFieldName() + ":\"" + head + "\"" + (tail.length() == 0 ? "" : CollectionSchema.text_t.getSolrFieldName() + ":\"" + head + " " + tail + "\"")); solrQuery.setStart(0); solrQuery.setRows(count); solrQuery.setHighlight(true); @@ -250,9 +248,15 @@ public class DidYouMean { for (Map> re: rawsnippets.values()) { for (List sl: re.values()) { for (String s: sl) { + s = s.replaceAll(" ", " "); int sp = s.indexOf(""); if (sp >= 0) { - s = s.substring(sp + 4); + if (tail.length() > 0) { + if (sp - tail.length() < 0 || !s.substring(sp - tail.length(), sp).equals(tail)) continue; + s = tail + s.substring(sp + 4); + } else { + s = s.substring(sp + 4); + } for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c < 'A') s = s.replace(c, ' '); @@ -267,11 +271,20 @@ public class DidYouMean { for (String x: sx) if (x.length() > 1 && sb.length() < 28) sb.append(x).append(' '); else break; s = sb.toString().trim(); int score = count; - for (String a: snippets) { - if (a.startsWith(s)) snippets.inc(a, count); - if (s.startsWith(a)) score += count; + if (s.length() > 2) { + boolean store = true; + for (String a: snippets) { + if (a.startsWith(s)) { + snippets.inc(a, count); + store = false; // s becomes superfluous + } + if (s.startsWith(a)) { + snippets.dec(a, count); // a becomes superfluous + score += count; + } + } + if (store) snippets.inc(s, score); } - if (sb.length() > 2) snippets.inc(s, score); count--; } } @@ -313,8 +326,7 @@ public class DidYouMean { for (final StringBuilder t: libr) { if (!t.equals(this.word)) { try { - this.createGen = false; - this.guessLib.put(t); + this.guesses.put(t); } catch (final InterruptedException e) {} } } @@ -350,21 +362,9 @@ public class DidYouMean { } catch (final InterruptedException e) {} } - // if there is not any entry in guessLib, then transfer all entries from the - // guessGen to guessLib - if (this.createGen) { - try { - this.guessGen.put(POISON_STRING); - StringBuilder s; - while (!(s = this.guessGen.take()).equals(POISON_STRING)) { - this.guessLib.put(s); - } - } catch (final InterruptedException e) {} - } - // put poison into guessLib to terminate consumers for (@SuppressWarnings("unused") final Consumer c: consumers) { - try { this.guessLib.put(POISON_STRING); } catch (final InterruptedException e) {} + try { this.guesses.put(POISON_STRING); } catch (final InterruptedException e) {} } // wait for termination of consumer @@ -373,6 +373,7 @@ public class DidYouMean { if (wait > 0) try { c.join(wait); } catch (final InterruptedException e) {} + if (c.isAlive()) c.interrupt(); } // we don't want the given word in the result @@ -385,15 +386,10 @@ public class DidYouMean { private void test(final StringBuilder s) throws InterruptedException { final Set libr = LibraryProvider.dymLib.recommend(s); libr.addAll(LibraryProvider.geoLoc.recommend(s)); - if (!libr.isEmpty()) { - this.createGen = false; - } for (final StringBuilder t: libr) { - this.guessLib.put(t); - } - if (this.createGen) { - this.guessGen.put(s); + this.guesses.put(t); } + this.guesses.add(s); } /** @@ -501,8 +497,8 @@ public class DidYouMean { public void run() { StringBuilder s; try { - while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) { - if (s.length() >= MinimumOutputWordLength && DidYouMean.this.segment.getWordCountGuess(s.toString()) > 2) { + while ((s = DidYouMean.this.guesses.take()) != POISON_STRING) { + if (s.length() >= MinimumOutputWordLength) { DidYouMean.this.resultSet.add(s); } if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {