From dd9cb06d250d8bbfc798c23ab8779a92018557f1 Mon Sep 17 00:00:00 2001 From: luccioman Date: Wed, 4 Oct 2017 08:41:43 +0200 Subject: [PATCH] Fixed RWI distance calculation on multi words search queries. Distance was lost when storing/retrieving references to intermediate result container. Now all JUnit tests are again successfully passing! --- .../kelondro/data/word/WordReferenceRow.java | 13 ++++++++--- .../kelondro/data/word/WordReferenceVars.java | 22 +++++++++++++++++-- .../kelondro/rwi/ReferenceContainerTest.java | 14 +++++++----- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index e80f2a9a7..ac10466f2 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -98,7 +98,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef private static final int col_posintext = 15; // t 2 first appearance of word in text private static final int col_posinphrase = 16; // r 1 position of word in its phrase private static final int col_posofphrase = 17; // o 1 number of the phrase where word appears - private static final int col_reserve1 = 18; // i 1 reserve1 + private static final int col_worddistance = 18; // i avg distance of search query words private static final int col_reserve2 = 19; // k 1 reserve2 // appearance flags, used in RWI entry @@ -130,6 +130,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef final char doctype, // type of document final int outlinksSame, // outlinks to same domain final int outlinksOther, // outlinks to other domain + final int wordDistance, // average distance of multi search query words final Bitfield flags // attributes to the url and to the word according the url ) { @@ -155,7 +156,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.entry.setCol(col_posintext, posintext); this.entry.setCol(col_posinphrase, posinphrase); this.entry.setCol(col_posofphrase, posofphrase); - this.entry.setCol(col_reserve1, 0); + this.entry.setCol(col_worddistance, wordDistance); this.entry.setCol(col_reserve2, 0); } @@ -194,7 +195,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.entry.setCol(col_lother, outlinksOther); this.entry.setCol(col_urlLength, urlLength); this.entry.setCol(col_urlComps, urlComps); - this.entry.setCol(col_reserve1, 0); + this.entry.setCol(col_worddistance, 0); this.entry.setCol(col_reserve2, 0); } @@ -271,6 +272,12 @@ public final class WordReferenceRow extends AbstractReference implements WordRef int pos = (int) this.entry.getColLong(col_posintext); return pos; } + + @Override + public int distance() { + final int distance = (int) this.entry.getColLong(col_worddistance); + return distance; + } /** * positions() is used to remember word positions for each query word of an diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index ae356fd4e..c045b0ff4 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -31,6 +31,7 @@ import java.util.Comparator; import java.util.Queue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; + import net.yacy.cora.date.MicroDate; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; @@ -66,6 +67,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc posinphrase, posofphrase, urlcomps, urllength, wordsintext, wordsintitle; + + /** Stored average words distance, when it can not be processed from positions because created from a WordReferenceRow instance */ + private int distance; private int virtualAge; private Queue positions; // word positons of joined references private double termFrequency; @@ -109,6 +113,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc } else { this.positions = null; } + this.distance = 0; // stored distance value is set to zero here because it has to be calculated from positions this.posinphrase = posinphrase; this.posintext = posintext; this.posofphrase = posofphrase; @@ -139,6 +144,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc } else { this.positions = null; } + this.distance = e.distance(); this.posinphrase = e.posinphrase(); this.posintext = e.posintext(); this.posofphrase = e.posofphrase(); @@ -165,6 +171,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.lother = 0; this.phrasesintext = 0; this.positions = null; + this.distance = 0; this.posinphrase = 0; this.posintext = 0; this.posofphrase = 0; @@ -275,6 +282,16 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc public Collection positions() { return this.positions; } + + @Override + public int distance() { + int value = super.distance(); + if(value == 0) { + /* Calcualtion from positions returned 0 : let's try with the stored value */ + value = this.distance; + } + return value; + } @Override public int posofphrase() { @@ -299,6 +316,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.type, // type of document this.llocal, // outlinks to same domain this.lother, // outlinks to other domain + this.distance(), // // average distance of multi search query words this.flags // attributes to the url and to the word according the url ); } @@ -376,7 +394,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (this.posintext > (v = other.posintext)) this.posintext = v; // calculate and remember min distance - if (this.positions != null || other.positions != null) { + if (this.distance() > 0 || other.distance() > 0) { int odist = other.distance(); int dist = this.distance(); if (odist > 0 && odist < dist) { @@ -413,7 +431,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (this.posintext < (v = other.posintext)) this.posintext = v; // calculate and remember max distance - if (this.positions != null || other.positions != null) { + if (this.distance() > 0 || other.distance() > 0) { int odist = other.distance(); int dist = this.distance(); if (odist > 0 && odist > dist) { diff --git a/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java b/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java index 646b55f6e..6058522eb 100644 --- a/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java +++ b/test/java/net/yacy/kelondro/rwi/ReferenceContainerTest.java @@ -19,8 +19,15 @@ */ package net.yacy.kelondro.rwi; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + import java.util.Queue; import java.util.concurrent.LinkedBlockingQueue; + +import org.junit.Test; + import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.crawler.retrieval.Response; @@ -29,10 +36,6 @@ import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceFactory; import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.util.Bitfield; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import org.junit.Test; /** * Unit tests for ReferenceContainer class. @@ -50,7 +53,7 @@ public class ReferenceContainerTest { ReferenceFactory wordReferenceFactory = new WordReferenceFactory(); byte[] termHash = Word.word2hash("test"); - ReferenceContainer rc = new ReferenceContainer(wordReferenceFactory, termHash); + ReferenceContainer rc = new ReferenceContainer(wordReferenceFactory, termHash); // prepare a WordReference to be added to the container DigestURL url = new DigestURL("http://test.org/test.html"); @@ -89,7 +92,6 @@ public class ReferenceContainerTest { assertNotNull("getReference failed", wc); - // TODO: ReferenceContainer used for rwi results. As distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix System.out.println("-----------------------------------------------------------"); System.out.println("WordReference (word distance) before add to container: " + wentry.distance()); System.out.println("WordReference (word distance) after get from container: " + wc.distance());