Fixed RWI distance calculation on multi words search queries.

Distance was lost when storing/retrieving references to intermediate
result container.

Now all JUnit tests are again successfully passing!
pull/135/head
luccioman 7 years ago
parent fcea6def72
commit dd9cb06d25

@ -98,7 +98,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
private static final int col_posintext = 15; // t 2 first appearance of word in text
private static final int col_posinphrase = 16; // r 1 position of word in its phrase
private static final int col_posofphrase = 17; // o 1 number of the phrase where word appears
private static final int col_reserve1 = 18; // i 1 reserve1
private static final int col_worddistance = 18; // i avg distance of search query words
private static final int col_reserve2 = 19; // k 1 reserve2
// appearance flags, used in RWI entry
@ -130,6 +130,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
final char doctype, // type of document
final int outlinksSame, // outlinks to same domain
final int outlinksOther, // outlinks to other domain
final int wordDistance, // average distance of multi search query words
final Bitfield flags // attributes to the url and to the word according the url
) {
@ -155,7 +156,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_posintext, posintext);
this.entry.setCol(col_posinphrase, posinphrase);
this.entry.setCol(col_posofphrase, posofphrase);
this.entry.setCol(col_reserve1, 0);
this.entry.setCol(col_worddistance, wordDistance);
this.entry.setCol(col_reserve2, 0);
}
@ -194,7 +195,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.entry.setCol(col_lother, outlinksOther);
this.entry.setCol(col_urlLength, urlLength);
this.entry.setCol(col_urlComps, urlComps);
this.entry.setCol(col_reserve1, 0);
this.entry.setCol(col_worddistance, 0);
this.entry.setCol(col_reserve2, 0);
}
@ -272,6 +273,12 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return pos;
}
@Override
public int distance() {
final int distance = (int) this.entry.getColLong(col_worddistance);
return distance;
}
/**
* positions() is used to remember word positions for each query word of an
* multi word search query.

@ -31,6 +31,7 @@ import java.util.Comparator;
import java.util.Queue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.date.MicroDate;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@ -66,6 +67,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
posinphrase, posofphrase,
urlcomps, urllength,
wordsintext, wordsintitle;
/** Stored average words distance, when it can not be processed from positions because created from a WordReferenceRow instance */
private int distance;
private int virtualAge;
private Queue<Integer> positions; // word positons of joined references
private double termFrequency;
@ -109,6 +113,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
} else {
this.positions = null;
}
this.distance = 0; // stored distance value is set to zero here because it has to be calculated from positions
this.posinphrase = posinphrase;
this.posintext = posintext;
this.posofphrase = posofphrase;
@ -139,6 +144,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
} else {
this.positions = null;
}
this.distance = e.distance();
this.posinphrase = e.posinphrase();
this.posintext = e.posintext();
this.posofphrase = e.posofphrase();
@ -165,6 +171,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.lother = 0;
this.phrasesintext = 0;
this.positions = null;
this.distance = 0;
this.posinphrase = 0;
this.posintext = 0;
this.posofphrase = 0;
@ -276,6 +283,16 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return this.positions;
}
@Override
public int distance() {
int value = super.distance();
if(value == 0) {
/* Calcualtion from positions returned 0 : let's try with the stored value */
value = this.distance;
}
return value;
}
@Override
public int posofphrase() {
return this.posofphrase;
@ -299,6 +316,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.type, // type of document
this.llocal, // outlinks to same domain
this.lother, // outlinks to other domain
this.distance(), // // average distance of multi search query words
this.flags // attributes to the url and to the word according the url
);
}
@ -376,7 +394,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.posintext > (v = other.posintext)) this.posintext = v;
// calculate and remember min distance
if (this.positions != null || other.positions != null) {
if (this.distance() > 0 || other.distance() > 0) {
int odist = other.distance();
int dist = this.distance();
if (odist > 0 && odist < dist) {
@ -413,7 +431,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.posintext < (v = other.posintext)) this.posintext = v;
// calculate and remember max distance
if (this.positions != null || other.positions != null) {
if (this.distance() > 0 || other.distance() > 0) {
int odist = other.distance();
int dist = this.distance();
if (odist > 0 && odist > dist) {

@ -19,8 +19,15 @@
*/
package net.yacy.kelondro.rwi;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingQueue;
import org.junit.Test;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.crawler.retrieval.Response;
@ -29,10 +36,6 @@ import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.util.Bitfield;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
/**
* Unit tests for ReferenceContainer class.
@ -50,7 +53,7 @@ public class ReferenceContainerTest {
ReferenceFactory<WordReference> wordReferenceFactory = new WordReferenceFactory();
byte[] termHash = Word.word2hash("test");
ReferenceContainer<WordReference> rc = new ReferenceContainer(wordReferenceFactory, termHash);
ReferenceContainer<WordReference> rc = new ReferenceContainer<WordReference>(wordReferenceFactory, termHash);
// prepare a WordReference to be added to the container
DigestURL url = new DigestURL("http://test.org/test.html");
@ -89,7 +92,6 @@ public class ReferenceContainerTest {
assertNotNull("getReference failed", wc);
// TODO: ReferenceContainer used for rwi results. As distance doesn't persist after adding ref to container making the distance ranking obsolete -> remove or fix
System.out.println("-----------------------------------------------------------");
System.out.println("WordReference (word distance) before add to container: " + wentry.distance());
System.out.println("WordReference (word distance) after get from container: " + wc.distance());

Loading…
Cancel
Save