diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index 5575c06d7..7ec32879c 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -252,9 +252,16 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return (0xff & this.entry.getColByte(col_hitcount)); } + /** + * First position of word in text + * @return Collection with one element + */ @Override public Collection positions() { - return new ArrayList(0); + int pos = (int) this.entry.getColLong(col_posintext); + ArrayList arr = new ArrayList(1); + arr.add(pos); + return arr; } @Override diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index af30c4db7..1cec8ad4e 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -60,10 +60,11 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc public final byte[] urlHash; private String hostHash = null; private final char type; - private int hitcount, llocal, lother, phrasesintext, - posinphrase, posofphrase, - urlcomps, urllength, - wordsintext, wordsintitle; + private int hitcount, // how often appears this word in the text + llocal, lother, phrasesintext, + posinphrase, posofphrase, + urlcomps, urllength, + wordsintext, wordsintitle; private int virtualAge; private final Queue positions; private double termFrequency; @@ -210,6 +211,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return this.type; } + /** + * How often appears this word in the text + * @return + */ @Override public int hitcount() { return this.hitcount; @@ -259,7 +264,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.hitcount, // how often appears this word in the text this.wordsintext, // total number of words this.phrasesintext, // total number of phrases - this.positions.isEmpty() ? 1 : this.positions.iterator().next(), // position of word in all words + + // TODO: positon 1 on empty positions may give high ranking scores for unknown pos (needs to be checked if 0 would be appropriate) + this.positions.isEmpty() ? -1 : this.positions.iterator().next(), // position of word in all words this.posinphrase, // position of word in its phrase this.posofphrase, // number of the phrase where word appears this.lastModified, // last-modified time of the document where word appears diff --git a/source/net/yacy/kelondro/rwi/AbstractReference.java b/source/net/yacy/kelondro/rwi/AbstractReference.java index 52c3193b0..e1097da85 100644 --- a/source/net/yacy/kelondro/rwi/AbstractReference.java +++ b/source/net/yacy/kelondro/rwi/AbstractReference.java @@ -63,9 +63,17 @@ public abstract class AbstractReference implements Reference { private static int max(Collection a) { if (a == null || a.isEmpty()) return Integer.MIN_VALUE; Iterator i = a.iterator(); + /* + expirienced concurrency issue with this short cut 2016-09-06 + on i.next w/o test of hasNext before + java.util.NoSuchElementException at java.util.concurrent.LinkedBlockingQueue$Itr.next(LinkedBlockingQueue.java:828) + if (a.size() == 1) return i.next(); if (a.size() == 2) return Math.max(i.next(), i.next()); int r = i.next(); + */ + int r = Integer.MIN_VALUE; + int s; while (i.hasNext()) { s = i.next(); @@ -77,9 +85,12 @@ public abstract class AbstractReference implements Reference { private static int min(Collection a) { if (a == null || a.isEmpty()) return Integer.MAX_VALUE; Iterator i = a.iterator(); + /* concurrency issue (see max()) if (a.size() == 1) return i.next(); if (a.size() == 2) return Math.min(i.next(), i.next()); int r = i.next(); + */ + int r = Integer.MAX_VALUE; int s; while (i.hasNext()) { s = i.next(); @@ -103,10 +114,11 @@ public abstract class AbstractReference implements Reference { if (positions().size() < 2) return 0; int d = 0; Iterator i = positions().iterator(); - int s0 = i.next(), s1; + // int s0 = i.next(), s1; // concurrency issue see max() + int s0 = -1, s1; while (i.hasNext()) { s1 = i.next(); - d += Math.abs(s0 - s1); + if (s0 > 0) d += Math.abs(s0 - s1); s0 = s1; } return d / (positions().size() - 1);