split query into filter query and text query to get better ranking

results and faster results
pull/1/head
Michael Peter Christen 10 years ago
parent 36e9cdb376
commit f5a032f293

@ -25,7 +25,9 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
@ -130,8 +132,10 @@ public class GSAsearchServlet extends HttpServlet {
// get a solr query string
QueryGoal qg = new QueryGoal(originalQuery);
StringBuilder solrQ = qg.collectionTextQueryString(false);
List<String> solrFQ = qg.collectionTextFilterQuery(false);
StringBuilder solrQ = qg.collectionTextQuery();
post.put("defType", "edismax");
for (String fq: solrFQ) post.add(CommonParams.FQ, fq);
post.put(CommonParams.Q, solrQ.toString());
post.put(CommonParams.ROWS, post.remove("num"));
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 100000000 : 100));

@ -141,7 +141,7 @@ public class SolrSelectServlet extends HttpServlet {
querystring = modifier.parse(querystring);
modifier.apply(mmsp);
QueryGoal qg = new QueryGoal(querystring);
StringBuilder solrQ = qg.collectionTextQueryString(false);
StringBuilder solrQ = qg.collectionTextQuery();
mmsp.getMap().put(CommonParams.Q, new String[]{solrQ.toString()}); // sru patch
}
String q = mmsp.get(CommonParams.Q, "");

@ -26,6 +26,7 @@ import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.SortedSet;
@ -318,41 +319,46 @@ public class QueryGoal {
for (final byte[] b: blues) this.include_hashes.remove(b);
}
public StringBuilder collectionTextQueryString(boolean noimages) {
final StringBuilder q = new StringBuilder(80);
public List<String> collectionTextFilterQuery(boolean noimages) {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200");
if (noimages) q.append(" AND -").append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif)");
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
if (noimages) fqs.add("-" + CollectionSchema.url_file_ext_s.getSolrFieldName() + ":(jpg OR png OR gif)");
return fqs;
}
public StringBuilder collectionTextQuery() {
// parse special requests
if (isCatchall()) return q;
if (isCatchall()) return new StringBuilder("*:*");
// add goal query
StringBuilder w = getGoalQuery();
if (w.length() > 0) {
q.append(" AND (");
q.append(w);
q.append(')');
}
return q;
return getGoalQuery();
}
public StringBuilder collectionImageQueryString(final QueryModifier modifier) {
final StringBuilder q = new StringBuilder(80);
public List<String> collectionImageFilterQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(AbstractSolrConnector.CATCHALL_DTERM + " OR ");
q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR ");
q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
fqs.add(
CollectionSchema.images_urlstub_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM + " OR " +
CollectionSchema.url_file_ext_s.getSolrFieldName() + ":(jpg OR png OR gif) OR " +
CollectionSchema.content_type.getSolrFieldName() + ":(image/*))");
return fqs;
}
public StringBuilder collectionImageQuery(final QueryModifier modifier) {
final StringBuilder q = new StringBuilder(80);
// parse special requests
if (isCatchall()) return q;
if (isCatchall()) return new StringBuilder("*:*");
// add goal query
StringBuilder w = getGoalQuery();
q.append(w);
// combine these queries for all relevant fields
if (w.length() > 0) {

@ -373,9 +373,9 @@ public final class QueryParams {
}
// construct query
final SolrQuery params = getBasicParams(getFacets);
final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionTextFilterQuery(excludeintext_image));
int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0;
params.setQuery(this.queryGoal.collectionTextQueryString(excludeintext_image).toString());
params.setQuery(this.queryGoal.collectionTextQuery().toString());
Ranking actRanking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile
String fq = actRanking.getFilterQuery();
@ -409,8 +409,8 @@ public final class QueryParams {
}
// construct query
final SolrQuery params = getBasicParams(getFacets);
params.setQuery(this.queryGoal.collectionImageQueryString(this.modifier).toString());
final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionImageFilterQuery());
params.setQuery(this.queryGoal.collectionImageQuery(this.modifier).toString());
// set boosts
StringBuilder bq = new StringBuilder();
@ -426,7 +426,7 @@ public final class QueryParams {
return params;
}
private SolrQuery getBasicParams(boolean getFacets) {
private SolrQuery getBasicParams(boolean getFacets, List<String> fqs) {
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax");
params.setParam(DisMaxParams.QF, CollectionSchema.text_t.getSolrFieldName() + "^1.0");
@ -441,7 +441,7 @@ public final class QueryParams {
}
// add site facets
final List<String> fqs = getFacetsFilterQueries();
fqs.addAll(getFacetsFilterQueries());
if (fqs.size() > 0) {
params.setFilterQueries(fqs.toArray(new String[fqs.size()]));
}

Loading…
Cancel
Save