enhanced suggestions

10 years ago · 1df6492019
parent c7fdde3bd1
commit 1df6492019
2 changed files with 80 additions and 8 deletions
--- a/htroot/suggest.java
+++ b/htroot/suggest.java
@ -64,7 +64,7 @@ public class suggest {
        final String originalquerystring = (post == null) ? "" : post.get("query", post.get("q", ""));
        final String querystring =  originalquerystring.replace('+', ' ').replaceAll("%20", " ");
        final int timeout = (post == null) ? 300 : post.getInt("timeout", 300);
-        final int count = (post == null) ? 10 : Math.min(20, post.getInt("count", 10));
+        final int count = (post == null) ? 10 : Math.min(30, post.getInt("count", 20));

        int c = 0;
        final DidYouMean didYouMean = new DidYouMean(sb.index, querystring);
--- a/source/net/yacy/data/DidYouMean.java
+++ b/source/net/yacy/data/DidYouMean.java
@ -83,6 +83,7 @@ public class DidYouMean {

    private final Segment segment;
    private final StringBuilder word;
+    private final boolean endsWithSpace;
    private final int wordLen;
    private long timeLimit;
    private final SortedSet<StringBuilder> resultSet;
@ -94,6 +95,7 @@ public class DidYouMean {
     * @param sort true/false -  sorts the resulting TreeSet by index.count(); <b>Warning:</b> this causes heavy i/o.
     */
    public DidYouMean(final Segment segment, final String word0) {
+        this.endsWithSpace = word0.length() > 0 && word0.charAt(word0.length() - 1) == ' ';
        this.word = new StringBuilder(word0.trim());
        this.resultSet = Collections.synchronizedSortedSet(new TreeSet<StringBuilder>(new headMatchingComparator(this.word, WORD_LENGTH_COMPARATOR)));
        this.wordLen = this.word.length();
@ -164,7 +166,11 @@ public class DidYouMean {
            // several words
            preSorted = getSuggestions(this.word.substring(0, lastIndexOfSpace), this.word.substring(lastIndexOfSpace + 1), timeout, preSortSelection, this.segment);
        } else {
-            preSorted = getSuggestions(timeout);
+            if (this.endsWithSpace) {
+                preSorted = getSuggestions(this.word.toString(), "", timeout, preSortSelection, this.segment);
+            } else {
+                preSorted = getSuggestions(timeout);
+            }
        }
        final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
        LinkedHashSet<StringBuilder> countSorted = new LinkedHashSet<StringBuilder>();
@ -216,24 +222,74 @@ public class DidYouMean {
        final SolrQuery solrQuery = new SolrQuery();
        solrQuery.setParam("defType", "edismax");
        solrQuery.setFacet(false);
-        assert tail.length() > 0 && tail.indexOf(' ') < 0; // if there would be a space it should be part of head
-        String q = head.length() == 0 ? CollectionSchema.text_t.getSolrFieldName() + ":" + tail + "~" : CollectionSchema.title.getSolrFieldName() + ":\"" + head + "\"^10 " + CollectionSchema.text_t.getSolrFieldName() + ":(" + head + " " + tail + ")~"; // for a fuzzy search we cannot apply fuzzyness on the tail only
-        String fq = head.length() == 0 ? null : CollectionSchema.text_t.getSolrFieldName() + ":\"" + head + "\""; // in all cases (a) and (b) there must be that portion, but that is not part of the snippet that we are searching
+        String q = "", fq = "";
+        if (head.length() == 0 && tail.length() > 0) {
+            // head == "", tail != "" -> only one word was entered, no space at end
+            q = CollectionSchema.title.getSolrFieldName() + ":\"" + tail + "\"^1000.0 " + CollectionSchema.text_t.getSolrFieldName() + ":" + tail + "~";
+            fq = null;
+        }
+        if (head.length() > 0 && tail.length() == 0) {
+            // head != "", tail == "" -> only one word was entered and ends on space
+            q = CollectionSchema.title.getSolrFieldName() + ":\"" + head + " \"^1000.0 " + CollectionSchema.text_t.getSolrFieldName() + ":\"" + head + " \"";
+            fq = CollectionSchema.text_t.getSolrFieldName() + ":\"" + head + " \"";
+        }
+        if (head.length() > 0 && tail.length() > 0) {
+            // head != "", tail != "" -> several words were entered, last one is in tail, everything before in head.
+            q = CollectionSchema.text_t.getSolrFieldName() + ":(" + head + " " + tail + ")~"; // for a fuzzy search we cannot apply fuzzyness on the tail only
+            fq = CollectionSchema.text_t.getSolrFieldName() + ":\"" + head + "\"";
+        }
        solrQuery.setQuery(q);
        if (head.length() > 0 && fq != null) solrQuery.setFilterQueries(fq);
        solrQuery.setStart(0);
        solrQuery.setRows(count);
        solrQuery.setHighlight(true);
-        solrQuery.setHighlightFragsize(head.length() + tail.length() + 80);
+        //solrQuery.setHighlightFragsize(head.length() + tail.length() + 180);
        solrQuery.setHighlightSimplePre("<b>");
        solrQuery.setHighlightSimplePost("</b>");
-        solrQuery.setHighlightSnippets(1);
-        solrQuery.addHighlightField(CollectionSchema.title.getSolrFieldName());
+        solrQuery.setHighlightSnippets(5);
+        //solrQuery.addHighlightField(CollectionSchema.title.getSolrFieldName());
        solrQuery.addHighlightField(CollectionSchema.text_t.getSolrFieldName());
        solrQuery.setFields(); // no fields wanted! only snippets
        OrderedScoreMap<String> snippets = new OrderedScoreMap<String>(null);
        try {
            QueryResponse response = segment.fulltext().getDefaultConnector().getResponseByParams(solrQuery);
+            
+            /*
+            SolrQuery query = new SolrQuery();
+            query.setRequestHandler("/suggest");
+            //query.setQueryType(suggestHandler);
+            query.setQuery((head + " " + tail).trim());
+            Map<String,String> params = new HashMap<String,String>();
+            params.put(CommonParams.ROWS,Integer.toString(count));
+            params.put(SpellingParams.SPELLCHECK_PREFIX + "field",dictionary);
+            params.put(SpellingParams.SPELLCHECK_PREFIX + "dictionary",dictionary);
+            params.put(SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR,Boolean.toString(onlyMorePopular));
+            params.put(SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES,Integer.toString(1));
+            params.put(SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS,Boolean.toString(collate));
+            params.put(SpellingParams.SPELLCHECK_COLLATE,Boolean.toString(collate));
+            query.add(new MapSolrParams(params));
+            response = segment.fulltext().getDefaultConnector().getResponseByParams(query);
+            
+            SpellCheckResponse spellCheckResponse = response.getSpellCheckResponse();
+            if (spellCheckResponse != null) {
+                Map<String,Suggestion> suggestionMapInternal = spellCheckResponse.getSuggestionMap();
+                if (suggestionMapInternal != null) {
+                    Map<String, Suggestion> suggestionMap = spellCheckResponse.getSuggestionMap();
+                }
+                if (spellCheckResponse.getCollatedResult() != null) {
+                    String collatedResult = spellCheckResponse.getCollatedResult().trim();
+                }
+                List<Suggestion> suggestions=spellCheckResponse.getSuggestions();
+                if (suggestions.size() != 0) {
+                    StringBuffer sb=new StringBuffer();
+                    for (Suggestion suggestion : suggestions) {
+                        sb.append(suggestion.getSuggestions().get(0)).append(" ");
+                    }
+                    String spellCheckProposal = sb.toString().trim();
+                }
+            }
+            */
+            
            Map<String, Map<String, List<String>>> rawsnippets = response.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
            if (rawsnippets != null) {
                for (Map<String, List<String>> re: rawsnippets.values()) {
@ -260,9 +316,25 @@ public class DidYouMean {
                }
            }
        } catch (SolrException e) {
+            e.printStackTrace();
        } catch (IOException e) {
+            e.printStackTrace();
        }
+        // delete all snippets which occur double-times, i.e. one that is a substring of another: remove longer snippet
        Iterator<String> si = snippets.keys(false);
+        while (si.hasNext()) {
+            String testsnippet = si.next().toLowerCase();
+            if (testsnippet.length() > head.length() + tail.length() + 1) {
+                Iterator<String> sin = snippets.keys(false);
+                while (sin.hasNext()) {
+                    String snippetx = sin.next();
+                    if (snippetx.length() != testsnippet.length() && snippetx.toLowerCase().startsWith(testsnippet)) {
+                        snippets.delete(snippetx);
+                    }
+                }
+            }
+        }
+        si = snippets.keys(false);
        while (si.hasNext() && result.size() < preSortSelection) {
            result.add(new StringBuilder(si.next()));
        }