From f5a032f29351e1f61a4fa01adbf3fbd8cd878acd Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 7 Apr 2015 16:10:13 +0200 Subject: [PATCH] split query into filter query and text query to get better ranking results and faster results --- .../yacy/http/servlets/GSAsearchServlet.java | 6 ++- .../yacy/http/servlets/SolrSelectServlet.java | 2 +- source/net/yacy/search/query/QueryGoal.java | 46 +++++++++++-------- source/net/yacy/search/query/QueryParams.java | 12 ++--- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/source/net/yacy/http/servlets/GSAsearchServlet.java b/source/net/yacy/http/servlets/GSAsearchServlet.java index 0910803fe..c531e7603 100644 --- a/source/net/yacy/http/servlets/GSAsearchServlet.java +++ b/source/net/yacy/http/servlets/GSAsearchServlet.java @@ -25,7 +25,9 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.util.Date; import java.util.Iterator; +import java.util.List; import java.util.Map; + import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; @@ -130,8 +132,10 @@ public class GSAsearchServlet extends HttpServlet { // get a solr query string QueryGoal qg = new QueryGoal(originalQuery); - StringBuilder solrQ = qg.collectionTextQueryString(false); + List solrFQ = qg.collectionTextFilterQuery(false); + StringBuilder solrQ = qg.collectionTextQuery(); post.put("defType", "edismax"); + for (String fq: solrFQ) post.add(CommonParams.FQ, fq); post.put(CommonParams.Q, solrQ.toString()); post.put(CommonParams.ROWS, post.remove("num")); post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 100000000 : 100)); diff --git a/source/net/yacy/http/servlets/SolrSelectServlet.java b/source/net/yacy/http/servlets/SolrSelectServlet.java index 2f129f26e..36fc7aa80 100644 --- a/source/net/yacy/http/servlets/SolrSelectServlet.java +++ b/source/net/yacy/http/servlets/SolrSelectServlet.java @@ -141,7 +141,7 @@ public class SolrSelectServlet extends HttpServlet { querystring = modifier.parse(querystring); modifier.apply(mmsp); QueryGoal qg = new QueryGoal(querystring); - StringBuilder solrQ = qg.collectionTextQueryString(false); + StringBuilder solrQ = qg.collectionTextQuery(); mmsp.getMap().put(CommonParams.Q, new String[]{solrQ.toString()}); // sru patch } String q = mmsp.get(CommonParams.Q, ""); diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index 2c286d8a3..9ea6a904d 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -26,6 +26,7 @@ import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; +import java.util.List; import java.util.Locale; import java.util.Set; import java.util.SortedSet; @@ -318,41 +319,46 @@ public class QueryGoal { for (final byte[] b: blues) this.include_hashes.remove(b); } - public StringBuilder collectionTextQueryString(boolean noimages) { - final StringBuilder q = new StringBuilder(80); + public List collectionTextFilterQuery(boolean noimages) { + final ArrayList fqs = new ArrayList<>(); // add filter to prevent that results come from failed urls - q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200"); - if (noimages) q.append(" AND -").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif)"); + fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200"); + if (noimages) fqs.add("-" + CollectionSchema.url_file_ext_s.getSolrFieldName() + ":(jpg OR png OR gif)"); + return fqs; + } + + public StringBuilder collectionTextQuery() { + // parse special requests - if (isCatchall()) return q; + if (isCatchall()) return new StringBuilder("*:*"); // add goal query - StringBuilder w = getGoalQuery(); - - if (w.length() > 0) { - q.append(" AND ("); - q.append(w); - q.append(')'); - } - return q; + return getGoalQuery(); } - public StringBuilder collectionImageQueryString(final QueryModifier modifier) { - final StringBuilder q = new StringBuilder(80); + public List collectionImageFilterQuery() { + final ArrayList fqs = new ArrayList<>(); // add filter to prevent that results come from failed urls - q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND ("); - q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM + " OR "); - q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR "); - q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))"); + fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200"); + fqs.add( + CollectionSchema.images_urlstub_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM + " OR " + + CollectionSchema.url_file_ext_s.getSolrFieldName() + ":(jpg OR png OR gif) OR " + + CollectionSchema.content_type.getSolrFieldName() + ":(image/*))"); + return fqs; + } + + public StringBuilder collectionImageQuery(final QueryModifier modifier) { + final StringBuilder q = new StringBuilder(80); // parse special requests - if (isCatchall()) return q; + if (isCatchall()) return new StringBuilder("*:*"); // add goal query StringBuilder w = getGoalQuery(); + q.append(w); // combine these queries for all relevant fields if (w.length() > 0) { diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 5f2a49819..6bba26e4a 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -373,9 +373,9 @@ public final class QueryParams { } // construct query - final SolrQuery params = getBasicParams(getFacets); + final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionTextFilterQuery(excludeintext_image)); int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0; - params.setQuery(this.queryGoal.collectionTextQueryString(excludeintext_image).toString()); + params.setQuery(this.queryGoal.collectionTextQuery().toString()); Ranking actRanking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile String fq = actRanking.getFilterQuery(); @@ -409,8 +409,8 @@ public final class QueryParams { } // construct query - final SolrQuery params = getBasicParams(getFacets); - params.setQuery(this.queryGoal.collectionImageQueryString(this.modifier).toString()); + final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionImageFilterQuery()); + params.setQuery(this.queryGoal.collectionImageQuery(this.modifier).toString()); // set boosts StringBuilder bq = new StringBuilder(); @@ -426,7 +426,7 @@ public final class QueryParams { return params; } - private SolrQuery getBasicParams(boolean getFacets) { + private SolrQuery getBasicParams(boolean getFacets, List fqs) { final SolrQuery params = new SolrQuery(); params.setParam("defType", "edismax"); params.setParam(DisMaxParams.QF, CollectionSchema.text_t.getSolrFieldName() + "^1.0"); @@ -441,7 +441,7 @@ public final class QueryParams { } // add site facets - final List fqs = getFacetsFilterQueries(); + fqs.addAll(getFacetsFilterQueries()); if (fqs.size() > 0) { params.setFilterQueries(fqs.toArray(new String[fqs.size()])); }