- added a new solr field references_i which stores the number of

INCOMING links to the corresponding web page. This information is taken
from the reverse link index (a 'little sister' of the RWI index).
- this field can be of use to enhance the ranking because a web page
with more incoming links can be more more important than others. But
this is not true for typical link pages like menues. Therefore the
number of outgoing links is needed.
- added a new solr attribute 'bf' to solr queries which is a boost
function extension. this field can contain a formula which comuptes the
boost according to given field values. After some experiments the
following forumla is now default:
div(add(1,references_i),pow(add(1,inboundlinkscount_i),1.6))^0.4
This takes the number of references and the inbound links. Further
experiments are needed to enhance that forumula.
pull/1/head
Michael Peter Christen 12 years ago
parent 7c3de8b4cd
commit 1052263af3

@ -65,6 +65,9 @@ httpstatus_i
## redirect url if the error code is 299 < httpstatus_i < 310
#httpstatus_redirect_s
## number of unique http references; used for ranking
references_i
### optional but highly recommended values, part of the index distribution process
## time when resource was loaded

@ -119,8 +119,8 @@ public class searchresult {
post.put(CommonParams.Q, solrQ.toString());
post.put(CommonParams.ROWS, post.remove("num"));
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100));
float f = Boost.RANKING.get(YaCySchema.fuzzy_signature_unique_b);
post.put("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(f)); // a boost query that moves double content to the back
post.put("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
post.put("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
post.put(CommonParams.FL,
YaCySchema.content_type.getSolrFieldName() + ',' +
YaCySchema.id.getSolrFieldName() + ',' +

@ -46,7 +46,8 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
YaCySchema.description,
YaCySchema.keywords,
YaCySchema.text_t,
YaCySchema.synonyms_sxt
YaCySchema.synonyms_sxt,
YaCySchema.references_i
};
// for minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLen = 3 the quantRate value must be not below 0.5!
@ -118,4 +119,20 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
return minTokenLen;
}
/**
* produce a string that can be added as a 'boost query' at the bq-attribute
* @return
*/
public String getBoostQuery() {
return YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(this.get(YaCySchema.fuzzy_signature_unique_b));
}
/**
* produce a boost function
* @return
*/
public String getBoostFunction() {
return "div(add(1,references_i),pow(add(1,inboundlinkscount_i),1.6))^0.4";
}
}

@ -47,6 +47,7 @@ public enum YaCySchema implements Schema {
failtype_s(SolrType.string, true, true, false, "fail type if a page was not loaded. This field is either empty, 'excl' or 'fail'"),
httpstatus_i(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
httpstatus_redirect_s(SolrType.num_integer, true, true, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded"),
references_i(SolrType.num_integer, true, true, false, "number of unique http references; used for ranking"),
// optional but recommended, part of index distribution
load_date_dt(SolrType.date, true, true, false, "time when resource was loaded"),

@ -383,6 +383,13 @@ public class Segment {
}
}
// ENRICH DOCUMENT WITH RANKING INFORMATION
if (this.urlCitationIndex != null && this.fulltext.getSolrScheme().contains(YaCySchema.references_i)) {
int references = this.urlCitationIndex.count(url.hash());
if (references > 0) solrInputDoc.setField(YaCySchema.references_i.getSolrFieldName(), references);
}
// STORE TO SOLR
String error = null;
tryloop: for (int i = 0; i < 20; i++) {

@ -428,8 +428,8 @@ public final class QueryParams {
// construct query
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax");
float f = Boost.RANKING.get(YaCySchema.fuzzy_signature_unique_b);
params.setParam("bq", YaCySchema.fuzzy_signature_unique_b.getSolrFieldName() + ":true^" + Float.toString(f)); // a boost query that moves double content to the back
params.setParam("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
params.setParam("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
params.setStart(this.offset);
params.setRows(this.itemsPerPage);
params.setFacet(false);

Loading…
Cancel
Save