Added filter query options to the ranking servlet /RankingSolr_p.html.

Filter queries are not actually related to ranking, but user requests
have pointed out that specific boost queries to move results to the end
of the result list are not sufficient. Such boost filters may be better
executed as actual filter and therefore such a filter can now be
statically applied to every search request. A typical use could be the
expression "http_unique_b:true AND www_unique_b:true" which uses the
recently introduced fields http_unique_b and www_unique_b which are true
only for one of the alternatives with/without http(s) and with/without
prefix 'www.' in host names.
pull/1/head
orbiter 11 years ago
parent 74c249288a
commit 9d5d86cd03

@ -1000,18 +1000,22 @@ search.ranking.rwi.profile =
# All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name # All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name
search.ranking.solr.collection.boostname.tmpa.0=Default Profile search.ranking.solr.collection.boostname.tmpa.0=Default Profile
search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0 search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0
search.ranking.solr.collection.filterquery.tmpa.0=
search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8 crawldepth_i:1^0.4 search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8 crawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.0= search.ranking.solr.collection.boostfunction.tmpb.0=
search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/data' usage search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/data' usage
search.ranking.solr.collection.boostfields.tmpa.1=text_t^1.0 search.ranking.solr.collection.boostfields.tmpa.1=text_t^1.0
search.ranking.solr.collection.filterquery.tmpa.1=
search.ranking.solr.collection.boostquery.tmpa.1=crawldepth_i:0^0.8 crawldepth_i:1^0.4 search.ranking.solr.collection.boostquery.tmpa.1=crawldepth_i:0^0.8 crawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.1=recip(ms(NOW,last_modified),3.16e-11,1,1) search.ranking.solr.collection.boostfunction.tmpb.1=recip(ms(NOW,last_modified),3.16e-11,1,1)
search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a singe domain only, i.e. if a site:-operator is used search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a singe domain only, i.e. if a site:-operator is used
search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0 search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0
search.ranking.solr.collection.filterquery.tmpa.2=
search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0 search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0
search.ranking.solr.collection.boostfunction.tmpb.2= search.ranking.solr.collection.boostfunction.tmpb.2=
search.ranking.solr.collection.boostname.tmpa.3=_unused3 search.ranking.solr.collection.boostname.tmpa.3=_unused3
search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0 search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0
search.ranking.solr.collection.filterquery.tmpa.3=
search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8 crawldepth_i:1^0.4 search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8 crawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.3= search.ranking.solr.collection.boostfunction.tmpb.3=

@ -27,7 +27,7 @@
To find out which kind of operations are possible, see the <a href="http://wiki.apache.org/solr/FunctionQuery" target="_blank">Solr Function Query</a> documentation. To find out which kind of operations are possible, see the <a href="http://wiki.apache.org/solr/FunctionQuery" target="_blank">Solr Function Query</a> documentation.
Example: to order by date, use "recip(ms(NOW,last_modified),3.16e-11,1,1)", to order by crawldepth, use "div(100,add(crawldepth_i,1))". Example: to order by date, use "recip(ms(NOW,last_modified),3.16e-11,1,1)", to order by crawldepth, use "div(100,add(crawldepth_i,1))".
<dl> <dl>
<dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="bf" id="bf_label">boost</label></dt> <dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="bf" id="bf_label">boost=</label></dt>
<dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;" id="bf_dd"> <dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;" id="bf_dd">
<input name="bf" id="bf" type="text" align="left" size="100" value="#[bf]#" /> <input name="bf" id="bf" type="text" align="left" size="100" value="#[bf]#" />
</dd> </dd>
@ -47,7 +47,7 @@
Example: "fuzzy_signature_unique_b:true^100000.0f" means that documents, identified as 'double' are ranked very bad and appended to the end of all results (because the unique are ranked high). Example: "fuzzy_signature_unique_b:true^100000.0f" means that documents, identified as 'double' are ranked very bad and appended to the end of all results (because the unique are ranked high).
To find appropriate fields for this query, see the <a href="IndexSchema_p.html">YaCy Solr Schema</a> and look for boolean values (with suffix '_b') or tags inside string fields (with suffix '_s' or '_sxt'). To find appropriate fields for this query, see the <a href="IndexSchema_p.html">YaCy Solr Schema</a> and look for boolean values (with suffix '_b') or tags inside string fields (with suffix '_s' or '_sxt').
<dl> <dl>
<dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="bq" id="bq_label">bq</label></dt> <dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="bq" id="bq_label">bq=</label></dt>
<dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;" id="bq_dd"> <dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;" id="bq_dd">
<input name="bq" id="bq" type="text" align="left" size="100" value="#[bq]#" /> <input name="bq" id="bq" type="text" align="left" size="100" value="#[bq]#" />
</dd> </dd>
@ -59,6 +59,26 @@
</dl> </dl>
</fieldset> </fieldset>
</form> </form>
<form class="dsearch" action="RankingSolr_p.html" method="post" enctype="multipart/form-data">
<fieldset>
<input type="hidden" name="profileNr" value="#[profileNr]#" />
<legend>Filter Query</legend>
The Filter Query is attached to every query. Use this to statically add a selection criteria to reduce the set of results.
Example: "http_unique_b:true AND www_unique_b:true" will filter out all results where urls appear also with/without http(s) and/or with/without 'www.' prefix.
To find appropriate fields for this query, see the <a href="IndexSchema_p.html">YaCy Solr Schema</a>. Warning: bad expressions here will cause that you don't have any search result!
<dl>
<dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="fq" id="fq_label">fq=</label></dt>
<dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;" id="fq_dd">
<input name="fq" id="fq" type="text" align="left" size="100" value="#[fq]#" />
</dd>
<dt style="width:260px;margin:0;padding:0;height:1.8em;"></dt>
<dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;">
<input type="submit" name="EnterFQ" value="Set Filter Query" />
<input type="submit" name="ResetFQ" value="Re-Set to default" />
</dd>
</dl>
</fieldset>
</form>
<form class="dsearch" action="RankingSolr_p.html" method="post" enctype="multipart/form-data"> <form class="dsearch" action="RankingSolr_p.html" method="post" enctype="multipart/form-data">
<fieldset> <fieldset>
<input type="hidden" name="profileNr" value="#[profileNr]#" /> <input type="hidden" name="profileNr" value="#[profileNr]#" />

@ -88,6 +88,21 @@ public class RankingSolr_p {
} }
} }
if (post != null && post.containsKey("EnterFQ")) {
String fq = post.get("fq");
if (fq != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_FILTERQUERY_ + profileNr, fq);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setFilterQuery(fq);
}
}
if (post != null && post.containsKey("ResetFQ")) {
String fq = ""; // i.e. "http_unique_b:true AND www_unique_b:true"
if (fq != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_FILTERQUERY_ + profileNr, fq);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setFilterQuery(fq);
}
}
if (post != null && post.containsKey("EnterBF")) { if (post != null && post.containsKey("EnterBF")) {
String bf = post.get("bf"); String bf = post.get("bf");
if (bf != null) { if (bf != null) {
@ -124,6 +139,7 @@ public class RankingSolr_p {
i++; i++;
} }
prop.put("boosts", i); prop.put("boosts", i);
prop.put("fq", ranking.getFilterQuery());
prop.put("bq", ranking.getBoostQuery()); prop.put("bq", ranking.getBoostQuery());
prop.put("bf", ranking.getBoostFunction()); prop.put("bf", ranking.getBoostFunction());

@ -38,17 +38,17 @@ public class Ranking {
private static int minTokenLen = 3; // to be filled with search.ranking.solr.doubledetection.minlength private static int minTokenLen = 3; // to be filled with search.ranking.solr.doubledetection.minlength
private Map<SchemaDeclaration, Float> fieldBoosts; private Map<SchemaDeclaration, Float> fieldBoosts;
private String name, boostQuery, boostFunction; private String name, filterQuery, boostQuery, boostFunction;
public Ranking() { public Ranking() {
super(); super();
this.name = ""; this.name = "";
this.fieldBoosts = new LinkedHashMap<SchemaDeclaration, Float>(); this.fieldBoosts = new LinkedHashMap<SchemaDeclaration, Float>();
this.filterQuery = "";
this.boostQuery = ""; this.boostQuery = "";
this.boostFunction = ""; this.boostFunction = "";
} }
public String getName() { public String getName() {
return name; return name;
} }
@ -95,12 +95,32 @@ public class Ranking {
} }
} }
/**
* set a filter query which will be added as fq-attribute to the query
* @param filterQuery
*/
public void setFilterQuery(String filterQuery) {
this.filterQuery = filterQuery;
}
/**
* get a string that can be added as a filter query at the fq-attribute
* @return
*/
public String getFilterQuery() {
return this.filterQuery;
}
/**
* set a boost query which will be added as bq-attribute to the query
* @param boostQuery
*/
public void setBoostQuery(String boostQuery) { public void setBoostQuery(String boostQuery) {
this.boostQuery = boostQuery; this.boostQuery = boostQuery;
} }
/** /**
* produce a string that can be added as a 'boost query' at the bq-attribute * get a string that can be added as a 'boost query' at the bq-attribute
* @return * @return
*/ */
public String getBoostQuery() { public String getBoostQuery() {

@ -148,8 +148,10 @@ public class GSAsearchServlet extends HttpServlet {
} else { } else {
// if no such sort attribute is given, use the ranking as configured for YaCy // if no such sort attribute is given, use the ranking as configured for YaCy
Ranking ranking = sb.index.fulltext().getDefaultConfiguration().getRanking(0); Ranking ranking = sb.index.fulltext().getDefaultConfiguration().getRanking(0);
String fq = ranking.getFilterQuery();
String bq = ranking.getBoostQuery(); String bq = ranking.getBoostQuery();
String bf = ranking.getBoostFunction(); String bf = ranking.getBoostFunction();
if (fq.length() > 0) post.put(CommonParams.FQ, bq);
if (bq.length() > 0) post.put("bq", bq); if (bq.length() > 0) post.put("bq", bq);
if (bf.length() > 0) post.put("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 if (bf.length() > 0) post.put("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
} }

@ -157,8 +157,10 @@ public class SolrSelectServlet extends HttpServlet {
if (!mmsp.getMap().containsKey("sort") && !mmsp.getMap().containsKey("bq") && !mmsp.getMap().containsKey("bf") && !mmsp.getMap().containsKey("boost")) { if (!mmsp.getMap().containsKey("sort") && !mmsp.getMap().containsKey("bq") && !mmsp.getMap().containsKey("bf") && !mmsp.getMap().containsKey("boost")) {
if (!mmsp.getMap().containsKey("defType")) mmsp.getMap().put("defType", new String[]{"edismax"}); if (!mmsp.getMap().containsKey("defType")) mmsp.getMap().put("defType", new String[]{"edismax"});
Ranking ranking = sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr); Ranking ranking = sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr);
String fq = ranking.getFilterQuery();
String bq = ranking.getBoostQuery(); String bq = ranking.getBoostQuery();
String bf = ranking.getBoostFunction(); String bf = ranking.getBoostFunction();
if (fq.length() > 0) mmsp.getMap().put("fq", new String[]{fq});
if (bq.length() > 0) mmsp.getMap().put("bq", new String[]{bq}); if (bq.length() > 0) mmsp.getMap().put("bq", new String[]{bq});
if (bf.length() > 0) mmsp.getMap().put("boost", new String[]{bf}); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 if (bf.length() > 0) mmsp.getMap().put("boost", new String[]{bf}); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
} }

@ -487,6 +487,7 @@ public final class Switchboard extends serverSwitch {
Ranking r = solrCollectionConfigurationWork.getRanking(i); Ranking r = solrCollectionConfigurationWork.getRanking(i);
String name = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTNAME_ + i, "_dummy" + i); String name = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTNAME_ + i, "_dummy" + i);
String boosts = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ + i, "text_t^1.0"); String boosts = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ + i, "text_t^1.0");
String fq = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_FILTERQUERY_ + i, "");
String bq = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + i, ""); String bq = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + i, "");
String bf = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + i, ""); String bf = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + i, "");
// apply some hard-coded patches for earlier experiments we do not want any more // apply some hard-coded patches for earlier experiments we do not want any more
@ -496,6 +497,7 @@ public final class Switchboard extends serverSwitch {
if (boosts.equals("url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0,host_organization_s^100000.0")) boosts = "url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^2.0"; if (boosts.equals("url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0,host_organization_s^100000.0")) boosts = "url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^2.0";
r.setName(name); r.setName(name);
r.updateBoosts(boosts); r.updateBoosts(boosts);
r.setFilterQuery(fq);
r.setBoostQuery(bq); r.setBoostQuery(bq);
r.setBoostFunction(bf); r.setBoostFunction(bf);
} }

@ -523,6 +523,7 @@ public final class SwitchboardConstants {
*/ */
public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTNAME_ = "search.ranking.solr.collection.boostname.tmpa."; // temporary until we know best default values; add the index number (0..3) to that string public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTNAME_ = "search.ranking.solr.collection.boostname.tmpa."; // temporary until we know best default values; add the index number (0..3) to that string
public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ = "search.ranking.solr.collection.boostfields.tmpa."; public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ = "search.ranking.solr.collection.boostfields.tmpa.";
public static final String SEARCH_RANKING_SOLR_COLLECTION_FILTERQUERY_ = "search.ranking.solr.collection.filterquery.tmpa.";
public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ = "search.ranking.solr.collection.boostquery.tmpa."; public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ = "search.ranking.solr.collection.boostquery.tmpa.";
public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ = "search.ranking.solr.collection.boostfunction.tmpb."; public static final String SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ = "search.ranking.solr.collection.boostfunction.tmpb.";

@ -348,6 +348,7 @@ public final class QueryParams {
params.setQuery(this.queryGoal.collectionTextQueryString(this.indexSegment.fulltext().getDefaultConfiguration(), rankingProfile, excludeintext_image).toString()); params.setQuery(this.queryGoal.collectionTextQueryString(this.indexSegment.fulltext().getDefaultConfiguration(), rankingProfile, excludeintext_image).toString());
Ranking ranking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile Ranking ranking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile
String fq = ranking.getFilterQuery();
String bq = ranking.getBoostQuery(); String bq = ranking.getBoostQuery();
String bf = ranking.getBoostFunction(); String bf = ranking.getBoostFunction();
if (this.queryGoal.getIncludeSize() > 1) { if (this.queryGoal.getIncludeSize() > 1) {
@ -355,6 +356,10 @@ public final class QueryParams {
if (bq.length() > 0) bq += " "; if (bq.length() > 0) bq += " ";
bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10"; bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10";
} }
if (fq.length() > 0) {
String oldfq = params.get("fq");
params.setParam("fq", oldfq == null || oldfq.length() == 0 ? fq : "(" + oldfq + ") AND (" + fq + ")");
}
if (bq.length() > 0) params.setParam("bq", bq); if (bq.length() > 0) params.setParam("bq", bq);
if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29

Loading…
Cancel
Save