Adjusted suggestions timeout management

* less CPU usage using the Solr 'allowedTime' parameter
* increase chances to get some results even when a first operation step
goes in time out by letting some time for final snippets results
processing
pull/137/head
luccioman 7 years ago
parent d5af160e60
commit 84d82bfdd7

@ -45,6 +45,9 @@ import net.yacy.search.schema.CollectionSchema;
*/ */
public class DidYouMean { public class DidYouMean {
/** Logs handler */
private static final ConcurrentLog logger = new ConcurrentLog("DidYouMean");
private static final int MinimumInputWordLength = 2; private static final int MinimumInputWordLength = 2;
private static final int MinimumOutputWordLength = 4; private static final int MinimumOutputWordLength = 4;
@ -150,7 +153,7 @@ public class DidYouMean {
* get suggestions for a given word. The result is first ordered using a term size ordering, * get suggestions for a given word. The result is first ordered using a term size ordering,
* and a subset of the result is sorted again with a IO-intensive order based on the index size * and a subset of the result is sorted again with a IO-intensive order based on the index size
* @param word0 * @param word0
* @param timeout * @param timeout maximum time (in milliseconds) allowed for processing suggestions. A negative value means no limit.
* @param preSortSelection the number of words that participate in the IO-intensive sort * @param preSortSelection the number of words that participate in the IO-intensive sort
* @return * @return
*/ */
@ -159,26 +162,32 @@ public class DidYouMean {
return this.resultSet; // return nothing if input is too short return this.resultSet; // return nothing if input is too short
} }
final long startTime = System.currentTimeMillis(); final long startTime = System.currentTimeMillis();
final long timelimit = startTime + timeout; /* Allocate only a part of the total allowed time to the first processing step, so that some time remains to process results in case of timeout */
final long preSortTimeout = timeout >= 0 ? ((long)(timeout * 0.8)) : timeout;
long totalTimeLimit = timeout >= 0 ? startTime + timeout : Long.MAX_VALUE;
int lastIndexOfSpace = this.word.lastIndexOf(" "); int lastIndexOfSpace = this.word.lastIndexOf(" ");
final Collection<StringBuilder> preSorted; final Collection<StringBuilder> preSorted;
if (askIndex && lastIndexOfSpace > 0) { if (askIndex && lastIndexOfSpace > 0) {
// several words // several words
preSorted = getSuggestions(this.word.substring(0, lastIndexOfSpace), this.word.substring(lastIndexOfSpace + 1), timeout, preSortSelection, this.segment); preSorted = getSuggestions(this.word.substring(0, lastIndexOfSpace), this.word.substring(lastIndexOfSpace + 1), preSortTimeout, preSortSelection, this.segment);
} else { } else {
if (this.endsWithSpace) { if (this.endsWithSpace) {
preSorted = getSuggestions(this.word.toString(), "", timeout, preSortSelection, this.segment); preSorted = getSuggestions(this.word.toString(), "", preSortTimeout, preSortSelection, this.segment);
} else { } else {
preSorted = getSuggestions(timeout, askIndex); preSorted = getSuggestions(preSortTimeout, askIndex);
} }
} }
final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER); final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
LinkedHashSet<StringBuilder> countSorted = new LinkedHashSet<StringBuilder>(); final LinkedHashSet<StringBuilder> countSorted = new LinkedHashSet<StringBuilder>();
if (this.more) { if (this.more) {
final int wc = this.segment.getWordCountGuess(this.word.toString()); // all counts must be greater than this final int wc = this.segment.getWordCountGuess(this.word.toString()); // all counts must be greater than this
try { try {
for (final StringBuilder s: preSorted) { for (final StringBuilder s: preSorted) {
if (System.currentTimeMillis() > timelimit) break; if (System.currentTimeMillis() > totalTimeLimit) {
logger.fine("Timeout while processing pre-sorted results.");
break;
}
if (!(scored.sizeSmaller(2 * preSortSelection))) break; if (!(scored.sizeSmaller(2 * preSortSelection))) break;
String s0 = s.toString(); String s0 = s.toString();
int wcg = s0.indexOf(' ') > 0 ? s0.length() * 100 : this.segment.getWordCountGuess(s0); int wcg = s0.indexOf(' ') > 0 ? s0.length() * 100 : this.segment.getWordCountGuess(s0);
@ -202,8 +211,10 @@ public class DidYouMean {
} }
// finished // finished
ConcurrentLog.info("DidYouMean", "found " + preSorted.size() + " unsorted terms, returned " + countSorted.size() + " sorted suggestions; execution time: " if(logger.isInfo()) {
+ (System.currentTimeMillis() - startTime) + "ms"); logger.info("found " + preSorted.size() + " unsorted terms, returned " + countSorted.size() + " sorted suggestions; execution time: "
+ (System.currentTimeMillis() - startTime) + "ms; " + " timeout : " + timeout + "ms.");
}
return countSorted; return countSorted;
} }
@ -212,11 +223,13 @@ public class DidYouMean {
* return a string that is a suggestion list for the list of given words * return a string that is a suggestion list for the list of given words
* @param head - the sequence of words before the last space in the sequence, fixed (not to be corrected); possibly empty * @param head - the sequence of words before the last space in the sequence, fixed (not to be corrected); possibly empty
* @param tail - the word after the last space, possibly empty or misspelled * @param tail - the word after the last space, possibly empty or misspelled
* @param timeout for operation * @param timeout maximum time allowed for operation in milliseconds. A negative value means no limit.
* @param preSortSelection - number of suggestions to be computed * @param preSortSelection - number of suggestions to be computed
* @return * @return
*/ */
private static Collection<StringBuilder> getSuggestions(final String head, final String tail, final long timeout, final int preSortSelection, final Segment segment) { private static Collection<StringBuilder> getSuggestions(final String head, final String tail, final long timeout, final int preSortSelection, final Segment segment) {
final long startTime = System.currentTimeMillis();
long totalTimeLimit = timeout >= 0 ? startTime + timeout : Long.MAX_VALUE;
final SortedSet<StringBuilder> result = new TreeSet<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER); final SortedSet<StringBuilder> result = new TreeSet<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
int count = 30; int count = 30;
final SolrQuery solrQuery = new SolrQuery(); final SolrQuery solrQuery = new SolrQuery();
@ -250,7 +263,13 @@ public class DidYouMean {
//solrQuery.addHighlightField(CollectionSchema.title.getSolrFieldName()); //solrQuery.addHighlightField(CollectionSchema.title.getSolrFieldName());
solrQuery.addHighlightField(CollectionSchema.text_t.getSolrFieldName()); solrQuery.addHighlightField(CollectionSchema.text_t.getSolrFieldName());
solrQuery.setFields(); // no fields wanted! only snippets solrQuery.setFields(); // no fields wanted! only snippets
if(timeout >= 0) {
/* Allocate only a part of the total allowed time to the solr request, so that some time remains to process results in case of timeout */
final long solrAllowedTime = (long)(timeout * 0.8);
solrQuery.setTimeAllowed(solrAllowedTime > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int)solrAllowedTime);
}
OrderedScoreMap<String> snippets = new OrderedScoreMap<String>(null); OrderedScoreMap<String> snippets = new OrderedScoreMap<String>(null);
final long solrResponseTime;
try { try {
QueryResponse response = segment.fulltext().getDefaultConnector().getResponseByParams(solrQuery); QueryResponse response = segment.fulltext().getDefaultConnector().getResponseByParams(solrQuery);
@ -290,7 +309,12 @@ public class DidYouMean {
} }
*/ */
Map<String, Map<String, List<String>>> rawsnippets = response.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets if(System.currentTimeMillis() > totalTimeLimit) {
logger.fine("Solr suggestions timeout. No more time to process raw snippets.");
return result;
}
final Map<String, Map<String, List<String>>> rawsnippets = response.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
if (rawsnippets != null) { if (rawsnippets != null) {
for (Map<String, List<String>> re: rawsnippets.values()) { for (Map<String, List<String>> re: rawsnippets.values()) {
for (List<String> sl: re.values()) { for (List<String> sl: re.values()) {
@ -319,7 +343,18 @@ public class DidYouMean {
e.printStackTrace(); e.printStackTrace();
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} finally {
solrResponseTime = System.currentTimeMillis();
} }
if(System.currentTimeMillis() > totalTimeLimit) {
if (logger.isFine()) {
logger.fine(
"Solr suggestions timeout. No more time to filter " + snippets.size() + " sorted snippets.");
}
return result;
}
// delete all snippets which occur double-times, i.e. one that is a substring of another: remove longer snippet // delete all snippets which occur double-times, i.e. one that is a substring of another: remove longer snippet
Iterator<String> si = snippets.keys(false); Iterator<String> si = snippets.keys(false);
while (si.hasNext()) { while (si.hasNext()) {
@ -338,18 +373,23 @@ public class DidYouMean {
while (si.hasNext() && result.size() < preSortSelection) { while (si.hasNext() && result.size() < preSortSelection) {
result.add(new StringBuilder(si.next())); result.add(new StringBuilder(si.next()));
} }
if (logger.isFine()) {
logger.fine(
"Solr suggestions response processed in " + (System.currentTimeMillis() - solrResponseTime) + "ms");
}
return result; return result;
} }
/** /**
* This method triggers the producer and consumer threads of the DidYouMean object. * This method triggers the producer and consumer threads of the DidYouMean object.
* @param word a String with a single word * @param word a String with a single word
* @param timeout execution time in ms. * @param timeout maximum expected execution time in milliseconds. A nagative value means no limit.
* @return a Set&lt;String&gt; with word variations contained in term index. * @return a Set&lt;String&gt; with word variations contained in term index.
*/ */
private Collection<StringBuilder> getSuggestions(final long timeout, boolean askIndex) { private Collection<StringBuilder> getSuggestions(final long timeout, boolean askIndex) {
final long startTime = System.currentTimeMillis(); final long startTime = System.currentTimeMillis();
this.timeLimit = startTime + timeout; this.timeLimit = timeout >= 0 ? startTime + timeout : Long.MAX_VALUE;
Thread[] producers = null; Thread[] producers = null;
if (this.more) { if (this.more) {
@ -521,6 +561,3 @@ public class DidYouMean {
} }
} }

Loading…
Cancel
Save