From e68b00678e9422bb39d4f6a1f0ad59277f405d69 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 11 Oct 2016 19:54:50 +0200 Subject: [PATCH] prevent negative score on URIMetadataNode - in the special case were no solr score is supplied. + assert before use & test case --- .../yacy/search/ranking/ReferenceOrder.java | 11 ++-- .../search/ranking/ReferenceOrderTest.java | 52 +++++++++++++++++++ 2 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 test/java/net/yacy/search/ranking/ReferenceOrderTest.java diff --git a/source/net/yacy/search/ranking/ReferenceOrder.java b/source/net/yacy/search/ranking/ReferenceOrder.java index 7e7376cbc..5e95f7dac 100644 --- a/source/net/yacy/search/ranking/ReferenceOrder.java +++ b/source/net/yacy/search/ranking/ReferenceOrder.java @@ -221,11 +221,11 @@ public class ReferenceOrder { public long cardinal(final WordReference t) { //return Long.MAX_VALUE - preRanking(ranking, iEntry, this.entryMin, this.entryMax, this.searchWords); // the normalizedEntry must be a normalized indexEntry - final Bitfield flags = t.flags(); assert this.min != null; assert this.max != null; assert t != null; assert this.ranking != null; + final Bitfield flags = t.flags(); final long tf = ((this.max.termFrequency() == this.min.termFrequency()) ? 0 : (((int)(((t.termFrequency()-this.min.termFrequency())*256.0)/(this.max.termFrequency() - this.min.termFrequency())))) << this.ranking.coeff_termfrequency); //System.out.println("tf(" + t.urlHash + ") = " + Math.floor(1000 * t.termFrequency()) + ", min = " + Math.floor(1000 * min.termFrequency()) + ", max = " + Math.floor(1000 * max.termFrequency()) + ", tf-normed = " + tf); final int maxmaxpos = this.max.maxposition(); // returns Integer.MIN_VALUE if positions empty @@ -266,19 +266,20 @@ public class ReferenceOrder { } public long cardinal(final URIMetadataNode t) { - //return Long.MAX_VALUE - preRanking(ranking, iEntry, this.entryMin, this.entryMax, this.searchWords); // the normalizedEntry must be a normalized indexEntry - final Bitfield flags = t.flags(); assert t != null; assert this.ranking != null; - final long r = + final Bitfield flags = t.flags(); + long r = ((256 - DigestURL.domLengthNormalized(t.hash())) << this.ranking.coeff_domlength) - + ((256 - (t.urllength() << 8)) << this.ranking.coeff_urllength) + // TODO: here we score currently absolute numbers (e.g. t.urllength() : (35 << coeff), in contrast rwi calculation is ((between min=0, max=255) << coeff) for each of the score factors + // + ((256 - (t.urllength() << 8)) << this.ranking.coeff_urllength) // TODO: this is for valid url always NEGATIVE + (t.virtualAge() << this.ranking.coeff_date) + (t.wordsintitle()<< this.ranking.coeff_wordsintitle) + (t.wordCount() << this.ranking.coeff_wordsintext) + (t.llocal() << this.ranking.coeff_llocal) + (t.lother() << this.ranking.coeff_lother) + // + ((this.ranking.coeff_authority > 12) ? (authority(t.hosthash()) << this.ranking.coeff_authority) : 0) + ((flags.get(WordReferenceRow.flag_app_dc_identifier)) ? 255 << this.ranking.coeff_appurl : 0) + ((flags.get(WordReferenceRow.flag_app_dc_title)) ? 255 << this.ranking.coeff_app_dc_title : 0) diff --git a/test/java/net/yacy/search/ranking/ReferenceOrderTest.java b/test/java/net/yacy/search/ranking/ReferenceOrderTest.java new file mode 100644 index 000000000..45036de3e --- /dev/null +++ b/test/java/net/yacy/search/ranking/ReferenceOrderTest.java @@ -0,0 +1,52 @@ +package net.yacy.search.ranking; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import net.yacy.cora.document.analysis.Classification; +import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.util.SpaceExceededException; +import net.yacy.kelondro.data.meta.URIMetadataNode; +import net.yacy.search.schema.CollectionConfiguration; +import org.apache.solr.common.SolrInputDocument; +import static org.junit.Assert.assertTrue; +import org.junit.Test; + +public class ReferenceOrderTest { + + + /** + * Test of cardinal method for URIMetadataNodes, of class ReferenceOrder. + * (only used if no Solr score supplied) + */ + @Test + public void testCardinal_URIMetadataNode() throws MalformedURLException, IOException, SpaceExceededException { + File config = new File("defaults/solr.collection.schema"); + CollectionConfiguration cc = new CollectionConfiguration(config, true); + + /** + * simple test of score result with default and zero ranking coefficient + */ + RankingProfile rpText = new RankingProfile(Classification.ContentDomain.TEXT); // default text profile + RankingProfile rpZero = new RankingProfile(Classification.ContentDomain.TEXT); + rpZero.allZero(); // sets all ranking factors to 0 + + ReferenceOrder roText = new ReferenceOrder(rpText, "xx"); // use unknown language + ReferenceOrder roZero = new ReferenceOrder(rpZero, "xx"); // use unknown language + + DigestURL url = new DigestURL("http://test.org/index.html"); + URIMetadataNode uri = new URIMetadataNode(url); + + // to simulate document retrieved from index, follow transformation as in storeToIndex + SolrInputDocument sid = cc.metadata2solr(uri); + // generate a node for further testing + URIMetadataNode testuri = new URIMetadataNode(cc.toSolrDocument(sid)); + + long scoreText = roText.cardinal(testuri); // score with text profile + long scoreZero = roZero.cardinal(testuri); // score 0-profile + + assertTrue("Zero-Score larger as Text-Score", scoreText >= scoreZero); + + } + +}