better handling of ranking parameters and new default values for date

navigation which is done using ranking in solr.
pull/1/head
Michael Peter Christen 11 years ago
parent 53948da7d0
commit f0db501630

@ -992,19 +992,19 @@ search.ranking.rwi.profile =
# All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name
search.ranking.solr.collection.boostname.tmpa.0=Default Profile
search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0
search.ranking.solr.collection.boostquery.tmpa.0=clickdepth_i:0^0.8 clickdepth_i:1^0.4
search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8 crawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.0=
search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/data' usage
search.ranking.solr.collection.boostfields.tmpa.1=text_t^1.0
search.ranking.solr.collection.boostquery.tmpa.1=clickdepth_i:0^0.8 clickdepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.1=recip(rord(last_modified),1,1000,1000)
search.ranking.solr.collection.boostquery.tmpa.1=crawldepth_i:0^0.8 crawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.1=recip(ms(NOW,last_modified),3.16e-11,1,1)
search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a singe domain only, i.e. if a site:-operator is used
search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0
search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0
search.ranking.solr.collection.boostfunction.tmpb.2=
search.ranking.solr.collection.boostname.tmpa.3=_unused3
search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0
search.ranking.solr.collection.boostquery.tmpa.3=clickdepth_i:0^0.8 clickdepth_i:1^0.4
search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8 crawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.3=
# the following values are used to identify duplicate content

@ -479,15 +479,15 @@ public class IndexControlRWIs_p {
DigestURL url;
URIMetadataNode entry;
String us;
long rn = -1;
float rn = Float.MIN_VALUE;
while (!theSearch.rwiIsEmpty() && (entry = theSearch.pullOneFilteredFromRWI(false)) != null) {
url = entry.url();
if ( url == null ) {
continue;
}
us = url.toNormalform(true);
if ( rn == -1 ) {
rn = entry.ranking();
if ( rn == Float.MIN_VALUE ) {
rn = entry.score();
}
prop.put("genUrlList_urlList_" + i + "_urlExists", "1");
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxCount", i);
@ -497,7 +497,7 @@ public class IndexControlRWIs_p {
prop.putHTML("genUrlList_urlList_" + i + "_urlExists_urlString", us);
prop.put("genUrlList_urlList_" + i + "_urlExists_urlStringShort",
(us.length() > 40) ? (us.substring(0, 20) + "<br>" + us.substring(20, 40) + "...") : ((us.length() > 30) ? (us.substring(0, 20) + "<br>" + us.substring(20)) : us));
prop.putNum("genUrlList_urlList_" + i + "_urlExists_ranking", (entry.ranking() - rn));
prop.putNum("genUrlList_urlList_" + i + "_urlExists_ranking", Float.toString(entry.score() - rn));
prop.putNum("genUrlList_urlList_" + i + "_urlExists_domlength", DigestURL.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_" + i + "_urlExists_tf", 1000.0 * entry.word().termFrequency());
prop.putNum("genUrlList_urlList_" + i + "_urlExists_authority", (theSearch.getOrder() == null) ? -1 : theSearch.getOrder().authority(ASCII.String(entry.hash(), 6, 6)));

@ -25,7 +25,7 @@
A Boost Function can combine numeric values from the result document to produce a number which is multiplied with the score value from the query result.
To see all available fields, see the <a href="IndexSchema_p.html">YaCy Solr Schema</a> and look for numeric values (these are names with suffix '_i').
To find out which kind of operations are possible, see the <a href="http://wiki.apache.org/solr/FunctionQuery" target="_blank">Solr Function Query</a> documentation.
Example: to order by date, use "recip(rord(last_modified),1,1000,1000)", to order by clickdepth, use "div(100,add(clickdepth_i,1))".
Example: to order by date, use "recip(ms(NOW,last_modified),3.16e-11,1,1)", to order by crawldepth, use "div(100,add(crawldepth_i,1))".
<dl>
<dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="bf" id="bf_label">boost</label></dt>
<dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;" id="bf_dd">

@ -195,7 +195,7 @@ public class yacysearchitem {
if (faviconURL != null && fileType == FileType.HTML) sb.loader.loadIfNotExistBackground(faviconURL, 1024 * 1024 * 10, null, ClientIdentification.yacyIntranetCrawlerAgent);
prop.putHTML("content_faviconCode", URLLicense.aquireLicense(faviconURL)); // acquire license for favicon url loading
prop.put("content_urlhash", resulthashString);
prop.put("content_ranking", result.ranking());
prop.put("content_ranking", Float.toString(result.score()));
prop.put("content_showMetadata_urlhash", resulthashString);
prop.put("content_showCache_link", resultUrlstring);
prop.put("content_showProxy_link", resultUrlstring);

@ -287,7 +287,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
return this.drained.iterator();
}
public interface Element<E> extends Serializable {
public interface Element<E> extends Serializable, Comparable<Element<E>>, Comparator<Element<E>> {
public long getWeight();
public E getElement();
public boolean equals(Element<E> o);
@ -295,6 +295,10 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
public int hashCode();
@Override
public String toString();
@Override
public int compare(Element<E> o1, Element<E> o2);
@Override
public int compareTo(Element<E> o);
}
private abstract static class AbstractElement<E> implements Element<E>, Serializable {
@ -334,7 +338,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
* natural ordering elements, can be used as container of objects <E> in the priority queue
* the elements with smallest ordering weights are first in the queue when elements are taken
*/
public static class NaturalElement<E> extends AbstractElement<E> implements Element<E>, Comparable<NaturalElement<E>>, Comparator<NaturalElement<E>> {
public static class NaturalElement<E> extends AbstractElement<E> implements Element<E>, Comparable<Element<E>>, Comparator<Element<E>> {
private static final long serialVersionUID = 6816543012966928794L;
@ -344,12 +348,12 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
}
@Override
public int compare(NaturalElement<E> o1, NaturalElement<E> o2) {
public int compare(Element<E> o1, Element<E> o2) {
return o1.compareTo(o2);
}
@Override
public int compareTo(NaturalElement<E> o) {
public int compareTo(Element<E> o) {
if (this.element == o.getElement()) return 0;
if (this.element.equals(o.getElement())) return 0;
if (this.weight > o.getWeight()) return 1;
@ -367,7 +371,7 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
* reverse ordering elements, can be used as container of objects <E> in the priority queue
* the elements with highest ordering weights are first in the queue when elements are taken
*/
public static class ReverseElement<E> extends AbstractElement<E> implements Element<E>, Comparable<ReverseElement<E>>, Comparator<ReverseElement<E>> {
public static class ReverseElement<E> extends AbstractElement<E> implements Element<E>, Comparable<Element<E>>, Comparator<Element<E>> {
private static final long serialVersionUID = -8166724491837508921L;
@ -377,12 +381,12 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
}
@Override
public int compare(ReverseElement<E> o1, ReverseElement<E> o2) {
public int compare(Element<E> o1, Element<E> o2) {
return o1.compareTo(o2);
}
@Override
public int compareTo(ReverseElement<E> o) {
public int compareTo(Element<E> o) {
if (this.element == o.getElement()) return 0;
if (this.element.equals(o.getElement())) return 0;
if (this.weight > o.getWeight()) return -1;

@ -72,7 +72,7 @@ public class URIMetadataNode extends SolrDocument {
protected Bitfield flags = null;
protected int imagec = -1, audioc = -1, videoc = -1, appc = -1;
protected double lat = Double.NaN, lon = Double.NaN;
protected long ranking = 0; // during generation of a search result this value is set
protected float score = 0; // during generation of a search result this value is set
protected String snippet = null;
protected WordReferenceVars word = null; // this is only used if the url is transported via remote search requests
@ -139,6 +139,7 @@ public class URIMetadataNode extends SolrDocument {
this.videoc = Integer.parseInt(prop.getProperty("lvideo", "0"));
this.appc = Integer.parseInt(prop.getProperty("lapp", "0"));
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""));
this.score = Float.parseFloat(prop.getProperty("score", "0.0"));
this.word = null;
if (prop.containsKey("wi")) {
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))), false);
@ -151,8 +152,8 @@ public class URIMetadataNode extends SolrDocument {
this.addField(name, doc.getFieldValue(name));
}
this.snippet = "";
Float score = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
this.ranking = score == null ? 0 : (long) (1000000.0f * score.floatValue()); // solr score values are sometimes very low
Float scorex = (Float) doc.getFieldValue("score"); // this is a special field containing the ranking score of a search result
this.score = scorex == null ? 0.0f : scorex.floatValue();
this.hash = ASCII.getBytes(getString(CollectionSchema.id));
this.urlRaw = getString(CollectionSchema.sku);
try {
@ -163,10 +164,10 @@ public class URIMetadataNode extends SolrDocument {
}
}
public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final long ranking) {
public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final float scorex) {
this(doc);
this.word = searchedWord;
this.ranking = ranking;
this.score = scorex;
}
/**
@ -254,8 +255,8 @@ public class URIMetadataNode extends SolrDocument {
return this.lon;
}
public long ranking() {
return this.ranking;
public float score() {
return this.score;
}
public Date loaddate() {
@ -467,6 +468,7 @@ public class URIMetadataNode extends SolrDocument {
s.append(",laudio=").append(this.laudio());
s.append(",lvideo=").append(this.lvideo());
s.append(",lapp=").append(this.lapp());
s.append(",score=").append(Float.toString(this.score()));
if (this.word() != null) {
// append also word properties
final String wprop = this.word().toPropertyForm();

@ -276,24 +276,24 @@ public final class Fulltext {
if (element == null) return null;
WordReferenceVars wre = element.getElement();
if (wre == null) return null; // all time was already wasted in takeRWI to get another element
long weight = element.getWeight();
URIMetadataNode node = getMetadata(wre.urlhash(), wre, weight);
float score = element.getWeight();
URIMetadataNode node = getMetadata(wre.urlhash(), wre, score);
return node;
}
public URIMetadataNode getMetadata(final byte[] urlHash) {
if (urlHash == null) return null;
return getMetadata(urlHash, null, 0);
return getMetadata(urlHash, null, 0.0f);
}
private URIMetadataNode getMetadata(final byte[] urlHash, final WordReferenceVars wre, final long weight) {
private URIMetadataNode getMetadata(final byte[] urlHash, final WordReferenceVars wre, final float score) {
String u = ASCII.String(urlHash);
// get the metadata from Solr
try {
SolrDocument doc = this.getDefaultConnector().getDocumentById(u);
if (doc != null) {
return new URIMetadataNode(doc, wre, weight);
return new URIMetadataNode(doc, wre, score);
}
} catch (final IOException e) {
ConcurrentLog.logException(e);

@ -907,7 +907,7 @@ public final class SearchEvent {
this.urlhashes.putUnique(iEntry.hash());
rankingtryloop: while (true) {
try {
long score = iEntry.ranking();
long score = (long) (1000000.0f * iEntry.score());
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score == 0 ? this.order.cardinal(iEntry) : score)); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch (final ArithmeticException e ) {
@ -1291,8 +1291,8 @@ public final class SearchEvent {
*/
public void addResult(ResultEntry resultEntry) {
if (resultEntry == null) return;
long ranking = resultEntry.ranking();
ranking += postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/);
float score = resultEntry.score();
final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/);
this.resultList.put(new ReverseElement<ResultEntry>(resultEntry, ranking)); // remove smallest in case of overflow
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
this.addTopics(resultEntry);

@ -229,7 +229,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
public int compare(ResultEntry o1, ResultEntry o2) {
return Base64Order.enhancedCoder.compare(o1.urlentry.hash(), o2.urlentry.hash());
}
public long ranking() {
return this.urlentry.ranking();
public float score() {
return this.urlentry.score();
}
}

Loading…
Cancel
Save