introduce getQueryFields to return default query fields (queryparamter QF)

calculated from boostfields config, making sure title, description, keywords and content is always searched.
- apply change to solrServlet makes sure every remote query uses at least all locally defined boost fields for search
- apply to local solr search
- simplify select query by using QF defaults
pull/1/head
reger 10 years ago
parent 53e4ae65d0
commit 9b0de2de64

@ -38,7 +38,7 @@ public class Ranking {
private static int minTokenLen = 3; // to be filled with search.ranking.solr.doubledetection.minlength private static int minTokenLen = 3; // to be filled with search.ranking.solr.doubledetection.minlength
private Map<SchemaDeclaration, Float> fieldBoosts; private Map<SchemaDeclaration, Float> fieldBoosts;
private String name, filterQuery, boostQuery, boostFunction; private String name, filterQuery, boostQuery, boostFunction, queryFields;
public Ranking() { public Ranking() {
super(); super();
@ -47,6 +47,7 @@ public class Ranking {
this.filterQuery = ""; this.filterQuery = "";
this.boostQuery = ""; this.boostQuery = "";
this.boostFunction = ""; this.boostFunction = "";
this.queryFields = null;
} }
public String getName() { public String getName() {
@ -60,7 +61,7 @@ public class Ranking {
public void putFieldBoost(SchemaDeclaration schema, float boost) { public void putFieldBoost(SchemaDeclaration schema, float boost) {
this.fieldBoosts.put(schema, boost); this.fieldBoosts.put(schema, boost);
} }
public Float getFieldBoost(SchemaDeclaration schema) { public Float getFieldBoost(SchemaDeclaration schema) {
return this.fieldBoosts.get(schema); return this.fieldBoosts.get(schema);
} }
@ -68,7 +69,46 @@ public class Ranking {
public Set<Map.Entry<SchemaDeclaration,Float>> getBoostMap() { public Set<Map.Entry<SchemaDeclaration,Float>> getBoostMap() {
return this.fieldBoosts.entrySet(); return this.fieldBoosts.entrySet();
} }
/**
* The boost fields are the fields to query used as Solr QF parameter
* This is currently used in local and remote queries, asure anticipated search relevant
* remote index fields are part of query fields (recommended: at least core
* metadata/Dublin Core text fields) even if disabled locally.
*
* @return queryfield string for Solr QF query parameter (list of fields with optonal boost factor "field1^5.0 field2 field3^2.0")
*/
public String getQueryFields() {
if (this.queryFields != null) return this.queryFields;
StringBuilder qf = new StringBuilder(80);
for (Map.Entry<SchemaDeclaration, Float> entry : this.fieldBoosts.entrySet()) {
SchemaDeclaration field = entry.getKey();
if ((field.getType() == SolrType.num_integer) // numerical and logical fields not usefull as default query field
|| (field.getType() == SolrType.num_long)
|| (field.getType() == SolrType.num_float)
|| (field.getType() == SolrType.num_double)
|| (field.getType() == SolrType.bool)) {
continue;
}
qf.append(field.getSolrFieldName());
final Float boost = entry.getValue();
if (boost != null) {
qf.append('^').append(boost.toString()).append(' ');
} else {
qf.append(' ');
}
}
// make sure Dublin Core Metadata core/text fields are set as default query field
if (!this.fieldBoosts.containsKey(CollectionSchema.title)) qf.append(CollectionSchema.title.getSolrFieldName()).append(' ');
if (!this.fieldBoosts.containsKey(CollectionSchema.text_t)) qf.append(CollectionSchema.text_t.getSolrFieldName()).append(' ');
if (!this.fieldBoosts.containsKey(CollectionSchema.description_txt)) qf.append(CollectionSchema.description_txt.getSolrFieldName()).append(' ');
if (!this.fieldBoosts.containsKey(CollectionSchema.keywords)) qf.append(CollectionSchema.keywords.getSolrFieldName());
this.queryFields = qf.toString(); // doesn't change often, cache it
return this.queryFields;
}
/** /**
* the updateDef is a definition string that comes from a configuration file. * the updateDef is a definition string that comes from a configuration file.
* It should be a comma-separated list of field^boost values * It should be a comma-separated list of field^boost values
@ -79,6 +119,7 @@ public class Ranking {
// call i.e. with "sku^20.0,url_paths_sxt^20.0,title^15.0,h1_txt^11.0,h2_txt^10.0,author^8.0,description_txt^5.0,keywords^2.0,text_t^1.0,fuzzy_signature_unique_b^100000.0" // call i.e. with "sku^20.0,url_paths_sxt^20.0,title^15.0,h1_txt^11.0,h2_txt^10.0,author^8.0,description_txt^5.0,keywords^2.0,text_t^1.0,fuzzy_signature_unique_b^100000.0"
if (boostDef == null || boostDef.length() == 0) return; if (boostDef == null || boostDef.length() == 0) return;
String[] bf = CommonPattern.COMMA.split(boostDef); String[] bf = CommonPattern.COMMA.split(boostDef);
this.queryFields = null; // empty cached qf
this.fieldBoosts.clear(); this.fieldBoosts.clear();
for (String boost: bf) { for (String boost: bf) {
int p = boost.indexOf('^'); int p = boost.indexOf('^');

@ -40,8 +40,6 @@ import javax.servlet.http.HttpServletResponse;
import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.responsewriter.EnhancedXMLResponseWriter; import net.yacy.cora.federate.solr.responsewriter.EnhancedXMLResponseWriter;
@ -65,6 +63,7 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.MultiMapSolrParams; import org.apache.solr.common.params.MultiMapSolrParams;
import static org.apache.solr.common.params.MultiMapSolrParams.addParam;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
@ -206,32 +205,16 @@ public class SolrSelectServlet extends HttpServlet {
if (connector == null) throw new ServletException("no core"); if (connector == null) throw new ServletException("no core");
// add default queryfield parameter according to local ranking config (or defaultfield) // add default queryfield parameter according to local ranking config (or defaultfield)
if (!mmsp.getMap().containsKey(DisMaxParams.QF) && !mmsp.getMap().containsKey(CommonParams.DF)) { if (ranking != null) { // ranking normally never null
final String qf = ranking.getQueryFields();
if (ranking != null && defaultConnector) { // ranking normally never null if (qf.length() > 4) { // make sure qf has content (else use df)
// construct the queryfield parameter addParam(DisMaxParams.QF, qf, mmsp.getMap()); // add QF that we set to be best suited for our index
StringBuilder qf = new StringBuilder(80); // TODO: if every peer applies a decent QF itself, this can be reverted to getMap().put()
for (Map.Entry<SchemaDeclaration, Float> entry : ranking.getBoostMap()) {
SchemaDeclaration field = entry.getKey();
final Float boost = entry.getValue();
if (field.getType() == SolrType.num_integer) {
continue;
}
qf.append(field.getSolrFieldName());
if (boost != null) {
qf.append('^').append(boost.toString()).append(' ');
} else {
qf.append("^1.0 ");
}
}
if (qf.length() > 4) // make sure qf has content (else use df)
mmsp.getMap().put(DisMaxParams.QF, new String[]{qf.toString()});
else
mmsp.getMap().put(CommonParams.DF, new String[]{CollectionSchema.text_t.getSolrFieldName()});
} else { } else {
mmsp.getMap().put(CommonParams.DF, new String[]{CollectionSchema.text_t.getSolrFieldName()}); mmsp.getMap().put(CommonParams.DF, new String[]{CollectionSchema.text_t.getSolrFieldName()});
} }
} else {
mmsp.getMap().put(CommonParams.DF, new String[]{CollectionSchema.text_t.getSolrFieldName()});
} }
// do the solr request, generate facets if we use a special YaCy format // do the solr request, generate facets if we use a special YaCy format

@ -27,15 +27,11 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import net.yacy.cora.document.WordCache; import net.yacy.cora.document.WordCache;
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.federate.solr.SchemaDeclaration;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector; import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.order.NaturalOrder; import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.Domains;
@ -336,28 +332,11 @@ public class QueryGoal {
// add goal query // add goal query
StringBuilder w = getGoalQuery(); StringBuilder w = getGoalQuery();
// combine these queries for all relevant fields
if (w.length() > 0) { if (w.length() > 0) {
q.append(" AND ("); q.append(" AND (");
int wc = 0; q.append(w);
Float boost;
Ranking r = configuration.getRanking(rankingProfile);
for (Map.Entry<SchemaDeclaration,Float> entry: r.getBoostMap()) {
SchemaDeclaration field = entry.getKey();
boost = entry.getValue();
if (boost == null || boost.floatValue() <= 0.0f) continue;
if (configuration != null && !configuration.contains(field.getSolrFieldName())) continue;
if (field.getType() == SolrType.num_integer) continue;
if (wc > 0) q.append(" OR ");
q.append('(');
q.append(field.getSolrFieldName()).append(':').append(w);
if (boost != null) q.append('^').append(boost.toString());
q.append(')');
wc++;
}
q.append(')'); q.append(')');
} }
return q; return q;
} }

@ -369,11 +369,13 @@ public final class QueryParams {
final SolrQuery params = getBasicParams(getFacets); final SolrQuery params = getBasicParams(getFacets);
int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0; int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0;
params.setQuery(this.queryGoal.collectionTextQueryString(this.indexSegment.fulltext().getDefaultConfiguration(), rankingProfile, excludeintext_image).toString()); params.setQuery(this.queryGoal.collectionTextQueryString(this.indexSegment.fulltext().getDefaultConfiguration(), rankingProfile, excludeintext_image).toString());
Ranking ranking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile Ranking actRanking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile
String fq = ranking.getFilterQuery(); String fq = actRanking.getFilterQuery();
String bq = ranking.getBoostQuery(); String bq = actRanking.getBoostQuery();
String bf = ranking.getBoostFunction(); String bf = actRanking.getBoostFunction();
final String qf = actRanking.getQueryFields();
if (!qf.isEmpty()) params.setParam(DisMaxParams.QF, qf);
if (this.queryGoal.getIncludeSize() > 1) { if (this.queryGoal.getIncludeSize() > 1) {
// add boost on combined words // add boost on combined words
if (bq.length() > 0) bq += " "; if (bq.length() > 0) bq += " ";

Loading…
Cancel
Save