diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index 364576bce..54796ee70 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -336,7 +336,24 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (virtualAge() > (v = other.virtualAge())) this.virtualAge = v; if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v; if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v; - if (other.positions != null) a(this.positions, min(this.positions, other.positions)); + + int minpos = min(this.positions, other.positions); + if (minpos != Integer.MAX_VALUE) { + int odist = other.distance(); + int dist = this.distance(); + this.positions.clear(); // we want only the min + this.positions.add(minpos); + // handle distance for multi word queries + // distance is calculated from positions, must be at least 2 positions for calculation + if (odist > 0 && odist < dist) { + this.positions.add(minpos + odist); + } else if (dist > 0) { + this.positions.add(minpos + dist); + } else if (odist > 0) { + this.positions.add(minpos + odist); + } + } + if (this.posinphrase > (v = other.posinphrase)) this.posinphrase = v; if (this.posofphrase > (v = other.posofphrase)) this.posofphrase = v; if (this.lastModified > (w = other.lastModified)) this.lastModified = w; @@ -358,7 +375,22 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (virtualAge() < (v = other.virtualAge())) this.virtualAge = v; if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v; if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v; - if (other.positions != null) a(this.positions, max(this.positions, other.positions)); + + int maxpos = max(this.positions, other.positions); + if (maxpos != Integer.MIN_VALUE) { + int odist = other.distance(); + int dist = this.distance(); + this.positions.clear(); + this.positions.add(maxpos); + // handle distance for multi word queries + // distance is calculated from positions, must be at least 2 positions for calculation + if (odist > dist) { + this.positions.add(maxpos - odist); // special cas for max, to not be altered by the pos for distance use pos before maxpos + } else if (dist > 0) { + this.positions.add(maxpos - dist); + } + } + if (this.posinphrase < (v = other.posinphrase)) this.posinphrase = v; if (this.posofphrase < (v = other.posofphrase)) this.posofphrase = v; if (this.lastModified < (w = other.lastModified)) this.lastModified = w; diff --git a/source/net/yacy/kelondro/rwi/AbstractReference.java b/source/net/yacy/kelondro/rwi/AbstractReference.java index fcf376753..c2175becf 100644 --- a/source/net/yacy/kelondro/rwi/AbstractReference.java +++ b/source/net/yacy/kelondro/rwi/AbstractReference.java @@ -32,13 +32,6 @@ import java.util.Iterator; public abstract class AbstractReference implements Reference { - protected static void a(Collection a, int i) { - assert a != null; - if (i == Integer.MAX_VALUE || i == Integer.MIN_VALUE) return; // signal for 'do nothing' - a.clear(); - a.add(i); - } - protected static int max(Collection a, Collection b) { if (a == null || a.isEmpty()) return max(b); if (b == null || b.isEmpty()) return max(a); diff --git a/test/java/net/yacy/kelondro/data/word/WordReferenceVarsTest.java b/test/java/net/yacy/kelondro/data/word/WordReferenceVarsTest.java new file mode 100644 index 000000000..e04f30b0f --- /dev/null +++ b/test/java/net/yacy/kelondro/data/word/WordReferenceVarsTest.java @@ -0,0 +1,94 @@ +/** + * WordReferenceVarsTest + * part of YaCy + * Copyright 2016 by reger24; https://github.com/reger24 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ +package net.yacy.kelondro.data.word; + +import java.net.MalformedURLException; +import net.yacy.cora.document.encoding.UTF8; +import net.yacy.cora.document.id.DigestURL; +import net.yacy.crawler.retrieval.Response; +import net.yacy.kelondro.util.Bitfield; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * Unit tests for WordReferenceVars class. + */ +public class WordReferenceVarsTest { + + /** + * Test of min method, of class WordReferenceVars. + * + * @author reger24 + */ + @Test + public void testMin() throws MalformedURLException { + + // testing posintext and distance calculation + int minposintext = 5; // minposintext for test + int maxposintext = 30; // maxposintext for test + + DigestURL url = new DigestURL("http://test.org/test.html"); + // create a WordReference template with posintext = minposintext = 5 + final WordReferenceRow ientry = new WordReferenceRow( + url.hash(), 20, 3, 2, + 1, 1, + System.currentTimeMillis(), System.currentTimeMillis(), + UTF8.getBytes("en"), Response.DT_TEXT, + 0, 0); + Word word = new Word(minposintext, 1, 100); + word.flags = new Bitfield(4); + ientry.setWord(word); + + WordReferenceVars wvMin = new WordReferenceVars(ientry, true); + wvMin.addPosition(10); // add position for distance testing + + WordReferenceVars wvMax = wvMin.clone(); + + // create a other reference + WordReferenceVars wvOther = new WordReferenceVars(ientry, true); + wvOther.addPosition(maxposintext); // add position (max) for distance testing + + // test min for posintext and distance + wvMin.min(wvOther); + assertEquals("min posintext", minposintext, wvMin.minposition()); + assertEquals("min distance", 5, wvMin.distance()); + + wvMin.min(wvOther); // test repeated call doesn't change result + assertEquals("min posintext (repeat)", minposintext, wvMin.minposition()); + assertEquals("min distance (repeat)", 5, wvMin.distance()); + + // test max for posintext and distance + wvMax.max(wvOther); + assertEquals("max posintext", maxposintext, wvMax.maxposition()); + assertEquals("max distance", maxposintext - minposintext, wvMax.distance()); + + wvMax.max(wvOther); // test repeated calls don't change result + wvMax.max(wvOther); + assertEquals("max posintext (repeat)", maxposintext, wvMax.maxposition()); + assertEquals("max distance (repeat)", maxposintext - minposintext, wvMax.distance()); + + // reverse test + wvOther.max(wvMax); + assertEquals("max posintext (reverse)", maxposintext, wvOther.maxposition()); + assertEquals("max distance (repeat)", maxposintext - minposintext, wvOther.distance()); + + } + +}