From 3c7220bc7b2e5f6d014083e755f468e5770ed023 Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 23 Oct 2016 19:40:02 +0200 Subject: [PATCH] Refacture rwi reference word position and word distance calculation used for rwi ranking. Main changes: - introduce a posintext() to access the stored value. This reduces also mem alloc of position array for WordReferenceRow (index access) - use the positions() array for joined references on multi-word queries if needed (otherwise allow positions() to be null - adjust assignments and the min() max() and distance() calculation accordingly --- htroot/IndexControlRWIs_p.java | 2 +- .../data/citation/CitationReference.java | 9 +- .../navigation/NavigationReferenceRow.java | 5 + .../navigation/NavigationReferenceVars.java | 4 + .../kelondro/data/word/WordReferenceRow.java | 25 ++-- .../kelondro/data/word/WordReferenceVars.java | 107 ++++++++++++------ .../yacy/kelondro/rwi/AbstractReference.java | 91 +-------------- source/net/yacy/kelondro/rwi/Reference.java | 27 ++--- .../peers/graphics/WebStructureGraph.java | 8 +- .../yacy/search/ranking/ReferenceOrder.java | 17 ++- .../data/word/WordReferenceVarsTest.java | 23 ++-- .../net/yacy/search/index/SegmentTest.java | 15 +-- 12 files changed, 146 insertions(+), 187 deletions(-) diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index baf170002..56521dbcd 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -519,7 +519,7 @@ public class IndexControlRWIs_p { prop.putNum("genUrlList_urlList_" + i + "_urlExists_lother", entry.word().lother()); prop.putNum("genUrlList_urlList_" + i + "_urlExists_hitcount", entry.word().hitcount()); prop.putNum("genUrlList_urlList_" + i + "_urlExists_worddistance", 0); - prop.putNum("genUrlList_urlList_" + i + "_urlExists_pos", entry.word().minposition()); + prop.putNum("genUrlList_urlList_" + i + "_urlExists_pos", entry.word().posintext()); prop.putNum("genUrlList_urlList_" + i + "_urlExists_phrase", entry.word().posofphrase()); prop.putNum("genUrlList_urlList_" + i + "_urlExists_posinphrase", entry.word().posinphrase()); prop.putNum("genUrlList_urlList_" + i + "_urlExists_urlcomps", entry.word().urlcomps()); diff --git a/source/net/yacy/kelondro/data/citation/CitationReference.java b/source/net/yacy/kelondro/data/citation/CitationReference.java index a72ed2fe8..f9be7b728 100644 --- a/source/net/yacy/kelondro/data/citation/CitationReference.java +++ b/source/net/yacy/kelondro/data/citation/CitationReference.java @@ -156,17 +156,12 @@ public class CitationReference implements Reference, Serializable { } @Override - public int maxposition() { - throw new UnsupportedOperationException(); - } - - @Override - public int minposition() { + public Collection positions() { throw new UnsupportedOperationException(); } @Override - public Collection positions() { + public int posintext() { throw new UnsupportedOperationException(); } diff --git a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java index 827da7f01..b7296937d 100644 --- a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java +++ b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java @@ -183,4 +183,9 @@ public final class NavigationReferenceRow extends AbstractReference implements N throw new UnsupportedOperationException(); } + @Override + public int posintext() { + throw new UnsupportedOperationException(); + } + } diff --git a/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java b/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java index a0bc1db86..35ffdcf4a 100644 --- a/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java +++ b/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java @@ -168,4 +168,8 @@ public class NavigationReferenceVars extends AbstractReference implements Navig throw new UnsupportedOperationException(); } + @Override + public int posintext() { + throw new UnsupportedOperationException(); } + } diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index e90771e7c..a6e3f5e00 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -26,7 +26,6 @@ package net.yacy.kelondro.data.word; -import java.util.ArrayList; import java.util.Collection; import net.yacy.cora.date.MicroDate; @@ -250,7 +249,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef @Override public long lastModified() { - return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified)); + return MicroDate.reverseMicroDateDays(this.entry.getColLong(col_lastModified)); } @Override @@ -259,18 +258,24 @@ public final class WordReferenceRow extends AbstractReference implements WordRef } /** - * First position of word in text. + * @return first positon of word in text + */ + @Override + public int posintext() { + int pos = (int) this.entry.getColLong(col_posintext); + return pos; + } + + /** * positions() is used to remember word positions for each query word of an - * multi word search query. As we currently don't include a separate posintext() - * function, we use positions to make the posintext value available. - * @return Collection with one element + * multi word search query. + * WordReferenceRow is for one WordReference and has no means to return multiple positions + * but is required by the interface. + * @return null */ @Override public Collection positions() { - int pos = (int) this.entry.getColLong(col_posintext); - ArrayList arr = new ArrayList(1); - arr.add(pos); - return arr; + return null; } @Override diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index e15a74f92..77db39b8c 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -62,11 +62,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc private final char type; private int hitcount, // how often appears this word in the text llocal, lother, phrasesintext, + posintext, // word position in text posinphrase, posofphrase, urlcomps, urllength, wordsintext, wordsintitle; private int virtualAge; - private final Queue positions; + private Queue positions; // word positons of joined references private double termFrequency; private final boolean local; @@ -78,6 +79,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc final int hitcount, // how often appears this word in the text final int wordcount, // total number of words final int phrasecount, // total number of phrases + final int posintext, // first position of word in text final Queue ps, // positions of words that are joined into the reference final int posinphrase, // position of word in its phrase final int posofphrase, // number of the phrase where word appears @@ -100,9 +102,15 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.llocal = outlinksSame; this.lother = outlinksOther; this.phrasesintext = phrasecount; - this.positions = new LinkedBlockingQueue(); - if (!ps.isEmpty()) for (final Integer i: ps) this.positions.add(i); + + if (ps != null && !ps.isEmpty()) { + this.positions = new LinkedBlockingQueue(); + for (final Integer i : ps) this.positions.add(i); + } else { + this.positions = null; + } this.posinphrase = posinphrase; + this.posintext = posintext; this.posofphrase = posofphrase; this.urlcomps = urlComps; this.urllength = urlLength; @@ -124,9 +132,15 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.llocal = e.llocal(); this.lother = e.lother(); this.phrasesintext = e.phrasesintext(); - this.positions = new LinkedBlockingQueue(); - if (!e.positions().isEmpty()) for (final Integer i: e.positions()) this.positions.add(i); + + if (e.positions() != null && !e.positions().isEmpty()) { + this.positions = new LinkedBlockingQueue(); + for (final Integer i: e.positions()) this.positions.add(i); + } else { + this.positions = null; + } this.posinphrase = e.posinphrase(); + this.posintext = e.posintext(); this.posofphrase = e.posofphrase(); this.urlcomps = e.urlcomps(); this.urllength = e.urllength(); @@ -152,6 +166,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.phrasesintext = 0; this.positions = null; this.posinphrase = 0; + this.posintext = 0; this.posofphrase = 0; this.urlcomps = 0; this.urllength = 0; @@ -172,6 +187,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.hitcount, this.wordsintext, this.phrasesintext, + this.posintext, this.positions, this.posinphrase, this.posofphrase, @@ -234,6 +250,20 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return this.posinphrase; } + /** + * First word position in text. + * @return min position + */ + @Override + public int posintext() { + return this.posintext; + } + + /** + * Word positions for joined references (for multi word queries). + * @see posintext() + * @return the word positions of the joined references + */ @Override public Collection positions() { return this.positions; @@ -253,7 +283,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.hitcount, // how often appears this word in the text this.wordsintext, // total number of words this.phrasesintext, // total number of phrases - this.positions.isEmpty() ? 0 : minposition(), // position of word in all words (WordReferenceRow stores first position in text, minpos also important for joined references) + this.posintext, // position of word in all words (WordReferenceRow stores first position in text) this.posinphrase, // position of word in its phrase this.posofphrase, // number of the phrase where word appears this.lastModified, // last-modified time of the document where word appears @@ -336,21 +366,19 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (virtualAge() > (v = other.virtualAge())) this.virtualAge = v; if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v; if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v; + if (this.posintext > (v = other.posintext)) this.posintext = v; - int minpos = min(this.positions, other.positions); - if (minpos != Integer.MAX_VALUE) { + // calculate and remember min distance + if (this.positions != null || other.positions != null) { int odist = other.distance(); int dist = this.distance(); - this.positions.clear(); // we want only the min - this.positions.add(minpos); - // handle distance for multi word queries - // distance is calculated from positions, must be at least 2 positions for calculation if (odist > 0 && odist < dist) { - this.positions.add(minpos + odist); - } else if (dist > 0) { - this.positions.add(minpos + dist); - } else if (odist > 0) { - this.positions.add(minpos + odist); + if (this.positions == null) { + this.positions = new LinkedBlockingQueue(); + } else { + this.positions.clear(); + } + this.positions.add(this.posintext + odist); } } @@ -375,19 +403,19 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (virtualAge() < (v = other.virtualAge())) this.virtualAge = v; if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v; if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v; + if (this.posintext < (v = other.posintext)) this.posintext = v; - int maxpos = max(this.positions, other.positions); - if (maxpos != Integer.MIN_VALUE) { + // calculate and remember max distance + if (this.positions != null || other.positions != null) { int odist = other.distance(); int dist = this.distance(); - this.positions.clear(); - this.positions.add(maxpos); - // handle distance for multi word queries - // distance is calculated from positions, must be at least 2 positions for calculation - if (odist > dist) { - this.positions.add(maxpos - odist); // special cas for max, to not be altered by the pos for distance use pos before maxpos - } else if (dist > 0) { - this.positions.add(maxpos - dist); + if (odist > 0 && odist > dist) { + if (this.positions == null) { + this.positions = new LinkedBlockingQueue(); + } else { + this.positions.clear(); + } + this.positions.add(this.posintext + odist); } } @@ -404,18 +432,27 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc /** * joins two entries into one entry * - * Main usage is on multi word searches to combine the position values for distance ranking, + * Main usage is on multi word searches to combine the position values for ranking and word distance calculation, * A Join is valid for the same url. * @param r WordReference */ @Override public void join(final Reference r) { - // combine the distance final WordReference oe = (WordReference) r; - this.positions.addAll(oe.positions()); - + // choose min posintext (for > 0) + if (this.posintext > 0 && oe.posintext() > 0) { + if (this.posintext > oe.posintext()) { + this.addPosition(this.posintext); // remember larger position (for distance calculation) + this.posintext = oe.posintext(); + } else { + this.addPosition(oe.posintext()); // remember other position (for distance calculation) + } + } else if (this.posintext == 0) { + this.posintext = oe.posintext(); + } + // join phrase // this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0; // this.posofphrase = Math.min(this.posofphrase, oe.posofphrase()); @@ -465,8 +502,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return o1.compareTo(o2); } + /** + * Add a position for word distance calculation to the list if position > 0 + * @param position + */ public void addPosition(final int position) { - this.positions.add(position); + if (this.positions == null && position > 0) this.positions = new LinkedBlockingQueue(); + if (position > 0) this.positions.add(position); } /** @@ -474,7 +516,6 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc * @param container * @return a blocking queue filled with WordReferenceVars that is still filled when the object is returned */ - public static BlockingQueue transform(final ReferenceContainer container, final long maxtime, final boolean local) { final LinkedBlockingQueue vars = new LinkedBlockingQueue(); if (container.size() <= 100) { diff --git a/source/net/yacy/kelondro/rwi/AbstractReference.java b/source/net/yacy/kelondro/rwi/AbstractReference.java index d972b4e29..bb509c800 100644 --- a/source/net/yacy/kelondro/rwi/AbstractReference.java +++ b/source/net/yacy/kelondro/rwi/AbstractReference.java @@ -26,102 +26,24 @@ package net.yacy.kelondro.rwi; -import java.util.Collection; import java.util.Iterator; import net.yacy.cora.util.ConcurrentLog; public abstract class AbstractReference implements Reference { - protected static int max(Collection a, Collection b) { - if (a == null || a.isEmpty()) return max(b); - if (b == null || b.isEmpty()) return max(a); - int ma = max(a); - int mb = max(b); - if (ma == Integer.MIN_VALUE) return mb; - if (mb == Integer.MIN_VALUE) return ma; - return Math.max(ma, mb); - } - - protected static int min(Collection a, Collection b) { - assert a != null; - if (a == null || a.isEmpty()) return min(b); - if (b == null || b.isEmpty()) return min(a); - int ma = min(a); - int mb = min(b); - if (ma == Integer.MAX_VALUE) return mb; - if (mb == Integer.MAX_VALUE) return ma; - return Math.min(ma, mb); - } - - private static int max(Collection a) { - if (a == null || a.isEmpty()) return Integer.MIN_VALUE; - Iterator i = a.iterator(); - /* - expirienced concurrency issue with this short cut 2016-09-06 - on i.next w/o test of hasNext before - java.util.NoSuchElementException at java.util.concurrent.LinkedBlockingQueue$Itr.next(LinkedBlockingQueue.java:828) - - if (a.size() == 1) return i.next(); - if (a.size() == 2) return Math.max(i.next(), i.next()); - int r = i.next(); - */ - int r = Integer.MIN_VALUE; - - int s; - while (i.hasNext()) { - s = i.next(); - if (s > r) r = s; - } - return r; - } - - private static int min(Collection a) { - if (a == null || a.isEmpty()) return Integer.MAX_VALUE; - Iterator i = a.iterator(); - /* concurrency issue (see max()) - if (a.size() == 1) return i.next(); - if (a.size() == 2) return Math.min(i.next(), i.next()); - int r = i.next(); - */ - int r = Integer.MAX_VALUE; - int s; - while (i.hasNext()) { - s = i.next(); - if (s < r) r = s; - } - return r; - } - - /** - * max position of search query words for multi word queries - * @return - */ - @Override - public int maxposition() { - return max(positions()); - } - - /** - * min word position of search query words for multi word queries - * @return - */ - @Override - public int minposition() { - return min(positions()); - } - /** * The average distance (in words) between search query terms for multi word searches. * @return word distance */ @Override public int distance() { - if (positions().size() < 2) return 0; + // check if positions have been joined + if (positions() == null || positions().isEmpty()) return 0; int d = 0; Iterator i = positions().iterator(); - // int s0 = i.next(), s1; // concurrency issue see max() - int s0 = -1, s1; + int s0 = posintext(); // init with own positon + int s1; while (i.hasNext()) { s1 = i.next(); if (s0 > 0) d += Math.abs(s0 - s1); @@ -130,11 +52,8 @@ public abstract class AbstractReference implements Reference { // despite first line checks for size < 2 Arithmetic exception div by zero occured (1.91/9278 2016-10-19) // added d == 0 condition as protection for this (which was in all above tests the case) try { - return d == 0 ? 0 : d / (positions().size() - 1); + return d == 0 ? 0 : d / positions().size(); } catch (ArithmeticException ex) { - // TODO: in peer to peer normalization of rwi queue modifies concurrently positions resulting in div by 0 exception - // with effect of above check position() < 2 is false but now true, it also results in changing ranking results until normalization is finished - // see related/causing code ReferenceOrder.normalizewith() and WordReferenceVars.max()/WordReferenceVars.min() -> refacturing of posintext, distance, min, max needed ConcurrentLog.fine("AbstractReference", "word distance calculation:" + ex.getMessage()); return 0; } diff --git a/source/net/yacy/kelondro/rwi/Reference.java b/source/net/yacy/kelondro/rwi/Reference.java index f4621bf86..66e01406a 100644 --- a/source/net/yacy/kelondro/rwi/Reference.java +++ b/source/net/yacy/kelondro/rwi/Reference.java @@ -58,29 +58,24 @@ public interface Reference { public void join(final Reference oe); /** - * Positions or search query words for the referenced result url - * This is only valid for multi word search queries. - * The positions contain the first word position for every search query word - * which has been joined (by join() ) - * @return list with word position + * First positon of word in text. + * Word position is not calculated for titles, for all title words it defaults to 0. + * @return min word position starting at 1 (0 if undefined) */ - public Collection positions(); + public int posintext(); /** - * max position of search query words (for multi word queries) - * @return - */ - public int maxposition(); - - /** - * min word position of search query words (for multi word queries) - * @return + * Positions of search query words for the referenced result url + * This is only valid for multi word search queries. + * The positions contain the first word position for every joined search query word + * which has been joined (by join() ) + * @return list with word position (excl. the posintext of this reference) or null */ - public int minposition(); + public Collection positions(); /** * The average distance (in words) between search query terms for multi word searches. - * The distance is calculated from positions() + * The distance is calculated from posintext() and positions() * @return word distance */ public int distance(); diff --git a/source/net/yacy/peers/graphics/WebStructureGraph.java b/source/net/yacy/peers/graphics/WebStructureGraph.java index 5c3bea554..ddce09833 100644 --- a/source/net/yacy/peers/graphics/WebStructureGraph.java +++ b/source/net/yacy/peers/graphics/WebStructureGraph.java @@ -30,7 +30,6 @@ package net.yacy.peers.graphics; import java.io.File; import java.io.Serializable; import java.text.ParseException; -import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -451,7 +450,12 @@ public class WebStructureGraph { @Override public Collection positions() { - return new ArrayList(0); + return null; + } + + @Override + public int posintext() { + throw new UnsupportedOperationException(); } } diff --git a/source/net/yacy/search/ranking/ReferenceOrder.java b/source/net/yacy/search/ranking/ReferenceOrder.java index 3663e5dfd..3fc812b5b 100644 --- a/source/net/yacy/search/ranking/ReferenceOrder.java +++ b/source/net/yacy/search/ranking/ReferenceOrder.java @@ -221,7 +221,6 @@ public class ReferenceOrder { * @return a ranking: the higher the number, the better is the ranking */ public long cardinal(final WordReference t) { - //return Long.MAX_VALUE - preRanking(ranking, iEntry, this.entryMin, this.entryMax, this.searchWords); // the normalizedEntry must be a normalized indexEntry assert this.min != null; assert this.max != null; @@ -230,17 +229,15 @@ public class ReferenceOrder { final Bitfield flags = t.flags(); final long tf = ((this.max.termFrequency() == this.min.termFrequency()) ? 0 : (((int)(((t.termFrequency()-this.min.termFrequency())*256.0)/(this.max.termFrequency() - this.min.termFrequency())))) << this.ranking.coeff_termfrequency); //System.out.println("tf(" + t.urlHash + ") = " + Math.floor(1000 * t.termFrequency()) + ", min = " + Math.floor(1000 * min.termFrequency()) + ", max = " + Math.floor(1000 * max.termFrequency()) + ", tf-normed = " + tf); - final int maxmaxpos = this.max.maxposition(); // returns Integer.MIN_VALUE if positions empty - final int minminpos = this.min.minposition(); final long r = ((256 - DigestURL.domLengthNormalized(t.urlhash())) << this.ranking.coeff_domlength) - + ((this.max.urlcomps() == this.min.urlcomps() ) ? 0 : (256 - (((t.urlcomps() - this.min.urlcomps() ) << 8) / (this.max.urlcomps() - this.min.urlcomps()) )) << this.ranking.coeff_urlcomps) - + ((this.max.urllength() == this.min.urllength() ) ? 0 : (256 - (((t.urllength() - this.min.urllength() ) << 8) / (this.max.urllength() - this.min.urllength()) )) << this.ranking.coeff_urllength) - + ((maxmaxpos == minminpos || maxmaxpos < 0) ? 0 : (256 - (((t.minposition() - minminpos) << 8) / (maxmaxpos - minminpos))) << this.ranking.coeff_posintext) - + ((this.max.posofphrase() == this.min.posofphrase()) ? 0 : (256 - (((t.posofphrase() - this.min.posofphrase() ) << 8) / (this.max.posofphrase() - this.min.posofphrase()) )) << this.ranking.coeff_posofphrase) - + ((this.max.posinphrase() == this.min.posinphrase()) ? 0 : (256 - (((t.posinphrase() - this.min.posinphrase() ) << 8) / (this.max.posinphrase() - this.min.posinphrase()) )) << this.ranking.coeff_posinphrase) - + ((this.max.distance() == this.min.distance() ) ? 0 : (256 - (((t.distance() - this.min.distance() ) << 8) / (this.max.distance() - this.min.distance()) )) << this.ranking.coeff_worddistance) - + ((this.max.virtualAge() == this.min.virtualAge()) ? 0 : (((t.virtualAge() - this.min.virtualAge() ) << 8) / (this.max.virtualAge() - this.min.virtualAge()) ) << this.ranking.coeff_date) + + ((this.max.urlcomps() == this.min.urlcomps() ) ? 0 : (256 - (((t.urlcomps() - this.min.urlcomps() ) << 8) / (this.max.urlcomps() - this.min.urlcomps()) )) << this.ranking.coeff_urlcomps) + + ((this.max.urllength() == this.min.urllength() ) ? 0 : (256 - (((t.urllength() - this.min.urllength() ) << 8) / (this.max.urllength() - this.min.urllength()) )) << this.ranking.coeff_urllength) + + ((this.max.posintext() == this.min.posintext()) ? 0 : (256 - (((t.posintext() - this.min.posintext() ) << 8) / (this.max.posintext() - this.min.posintext()) )) << this.ranking.coeff_posintext) + + ((this.max.posofphrase() == this.min.posofphrase()) ? 0 : (256 - (((t.posofphrase() - this.min.posofphrase() ) << 8) / (this.max.posofphrase() - this.min.posofphrase()) )) << this.ranking.coeff_posofphrase) + + ((this.max.posinphrase() == this.min.posinphrase()) ? 0 : (256 - (((t.posinphrase() - this.min.posinphrase() ) << 8) / (this.max.posinphrase() - this.min.posinphrase()) )) << this.ranking.coeff_posinphrase) + + ((this.max.distance() == this.min.distance() ) ? 0 : (256 - (((t.distance() - this.min.distance() ) << 8) / (this.max.distance() - this.min.distance()) )) << this.ranking.coeff_worddistance) + + ((this.max.virtualAge() == this.min.virtualAge()) ? 0 : (((t.virtualAge() - this.min.virtualAge() ) << 8) / (this.max.virtualAge() - this.min.virtualAge()) ) << this.ranking.coeff_date) + ((this.max.wordsintitle() == this.min.wordsintitle()) ? 0 : (((t.wordsintitle() - this.min.wordsintitle() ) << 8) / (this.max.wordsintitle() - this.min.wordsintitle()) ) << this.ranking.coeff_wordsintitle) + ((this.max.wordsintext() == this.min.wordsintext()) ? 0 : (((t.wordsintext() - this.min.wordsintext() ) << 8) / (this.max.wordsintext() - this.min.wordsintext()) ) << this.ranking.coeff_wordsintext) + ((this.max.phrasesintext() == this.min.phrasesintext()) ? 0 : (((t.phrasesintext()- this.min.phrasesintext() ) << 8) / (this.max.phrasesintext()- this.min.phrasesintext()) ) << this.ranking.coeff_phrasesintext) diff --git a/test/java/net/yacy/kelondro/data/word/WordReferenceVarsTest.java b/test/java/net/yacy/kelondro/data/word/WordReferenceVarsTest.java index e04f30b0f..ebc04c1cf 100644 --- a/test/java/net/yacy/kelondro/data/word/WordReferenceVarsTest.java +++ b/test/java/net/yacy/kelondro/data/word/WordReferenceVarsTest.java @@ -57,36 +57,39 @@ public class WordReferenceVarsTest { ientry.setWord(word); WordReferenceVars wvMin = new WordReferenceVars(ientry, true); - wvMin.addPosition(10); // add position for distance testing - - WordReferenceVars wvMax = wvMin.clone(); - // create a other reference - WordReferenceVars wvOther = new WordReferenceVars(ientry, true); + WordReferenceVars wvOther = wvMin.clone(); + + word.posInText = maxposintext; + ientry.setWord(word); + WordReferenceVars wvMax = new WordReferenceVars(ientry, true); + + wvMin.addPosition(10); // add position for distance testing + wvMax.addPosition(maxposintext); // add position for distance testing wvOther.addPosition(maxposintext); // add position (max) for distance testing // test min for posintext and distance wvMin.min(wvOther); - assertEquals("min posintext", minposintext, wvMin.minposition()); + assertEquals("min posintext", minposintext, wvMin.posintext()); assertEquals("min distance", 5, wvMin.distance()); wvMin.min(wvOther); // test repeated call doesn't change result - assertEquals("min posintext (repeat)", minposintext, wvMin.minposition()); + assertEquals("min posintext (repeat)", minposintext, wvMin.posintext()); assertEquals("min distance (repeat)", 5, wvMin.distance()); // test max for posintext and distance wvMax.max(wvOther); - assertEquals("max posintext", maxposintext, wvMax.maxposition()); + assertEquals("max posintext", maxposintext, wvMax.posintext()); assertEquals("max distance", maxposintext - minposintext, wvMax.distance()); wvMax.max(wvOther); // test repeated calls don't change result wvMax.max(wvOther); - assertEquals("max posintext (repeat)", maxposintext, wvMax.maxposition()); + assertEquals("max posintext (repeat)", maxposintext, wvMax.posintext()); assertEquals("max distance (repeat)", maxposintext - minposintext, wvMax.distance()); // reverse test wvOther.max(wvMax); - assertEquals("max posintext (reverse)", maxposintext, wvOther.maxposition()); + assertEquals("max posintext (reverse)", maxposintext, wvOther.posintext()); assertEquals("max distance (repeat)", maxposintext - minposintext, wvOther.distance()); } diff --git a/test/java/net/yacy/search/index/SegmentTest.java b/test/java/net/yacy/search/index/SegmentTest.java index 9ebea042b..3614879fa 100644 --- a/test/java/net/yacy/search/index/SegmentTest.java +++ b/test/java/net/yacy/search/index/SegmentTest.java @@ -155,7 +155,7 @@ public class SegmentTest { // creates one test url with this text in the rwi index DigestURL url = new DigestURL("http://test.org/test.html"); - storeTestDocTextToTermIndex(url, "One Two Three Four Five. This is a test text. One two three for five."); + storeTestDocTextToTermIndex(url, "One Two Three Four Five. This is a test text. One two three for five"); // posintext 1 2 3 4 5 6 7 8 9 // hitcount ("five") 1 1 2 // posofphrase |-------100------------| |------101---------| |--------102----------| @@ -171,7 +171,7 @@ public class SegmentTest { // do the search TermSearch result = index.termIndex.query(queryHashes, excludeHashes, urlselection, termFactory, Integer.MAX_VALUE); - // get the joined resutls + // get the joined results ReferenceContainer wc = result.joined(); // we should have now one result (stored to index above) @@ -181,7 +181,7 @@ public class SegmentTest { WordReference r = wc.getReference(url.hash()); // min position of search word in text (posintext) - assertEquals("minposition('five')", 5, r.minposition()); + assertEquals("min posintext('five')", 5, r.posintext()); // occurence of search words in text assertEquals("hitcount('five')", 2, r.hitcount()); @@ -190,15 +190,6 @@ public class SegmentTest { assertEquals("posofphrase", 100, r.posofphrase()); assertEquals("posinphrase", 5, r.posinphrase()); - // currently the results are not as expected for a multi-word query - // (reason: Reference container is backed by ReferenceRow (which doen't hold positions of joined references) ergo can't return related results - System.out.println("-----------------"); - System.out.println("positions=" + r.positions() + " (expected=5,8)"); - // max position of search word in text - System.out.println("maxposition=" + r.maxposition() + " (expected=8)"); - // for a multiword query distance expected to be the avg of search word positions in text - System.out.println("distance=" + r.distance() + " (expected=3)"); - System.out.println("-----------------"); } }