implemented RWI WordReference to return the word position value (was always left empty)

This is needed and enables existing word position ranking for RWI.
The upcoming concurrency issue in word position min/max calculation were eliminated
by iterator.hasHext check before next() access.
pull/93/head
reger 9 years ago
parent 43ba23335b
commit 120bf7e6e2

@ -252,9 +252,16 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return (0xff & this.entry.getColByte(col_hitcount));
}
/**
* First position of word in text
* @return Collection with one element
*/
@Override
public Collection<Integer> positions() {
return new ArrayList<Integer>(0);
int pos = (int) this.entry.getColLong(col_posintext);
ArrayList arr = new ArrayList<Integer>(1);
arr.add(pos);
return arr;
}
@Override

@ -60,7 +60,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
public final byte[] urlHash;
private String hostHash = null;
private final char type;
private int hitcount, llocal, lother, phrasesintext,
private int hitcount, // how often appears this word in the text
llocal, lother, phrasesintext,
posinphrase, posofphrase,
urlcomps, urllength,
wordsintext, wordsintitle;
@ -210,6 +211,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return this.type;
}
/**
* How often appears this word in the text
* @return
*/
@Override
public int hitcount() {
return this.hitcount;
@ -259,7 +264,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.hitcount, // how often appears this word in the text
this.wordsintext, // total number of words
this.phrasesintext, // total number of phrases
this.positions.isEmpty() ? 1 : this.positions.iterator().next(), // position of word in all words
// TODO: positon 1 on empty positions may give high ranking scores for unknown pos (needs to be checked if 0 would be appropriate)
this.positions.isEmpty() ? -1 : this.positions.iterator().next(), // position of word in all words
this.posinphrase, // position of word in its phrase
this.posofphrase, // number of the phrase where word appears
this.lastModified, // last-modified time of the document where word appears

@ -63,9 +63,17 @@ public abstract class AbstractReference implements Reference {
private static int max(Collection<Integer> a) {
if (a == null || a.isEmpty()) return Integer.MIN_VALUE;
Iterator<Integer> i = a.iterator();
/*
expirienced concurrency issue with this short cut 2016-09-06
on i.next w/o test of hasNext before
java.util.NoSuchElementException at java.util.concurrent.LinkedBlockingQueue$Itr.next(LinkedBlockingQueue.java:828)
if (a.size() == 1) return i.next();
if (a.size() == 2) return Math.max(i.next(), i.next());
int r = i.next();
*/
int r = Integer.MIN_VALUE;
int s;
while (i.hasNext()) {
s = i.next();
@ -77,9 +85,12 @@ public abstract class AbstractReference implements Reference {
private static int min(Collection<Integer> a) {
if (a == null || a.isEmpty()) return Integer.MAX_VALUE;
Iterator<Integer> i = a.iterator();
/* concurrency issue (see max())
if (a.size() == 1) return i.next();
if (a.size() == 2) return Math.min(i.next(), i.next());
int r = i.next();
*/
int r = Integer.MAX_VALUE;
int s;
while (i.hasNext()) {
s = i.next();
@ -103,10 +114,11 @@ public abstract class AbstractReference implements Reference {
if (positions().size() < 2) return 0;
int d = 0;
Iterator<Integer> i = positions().iterator();
int s0 = i.next(), s1;
// int s0 = i.next(), s1; // concurrency issue see max()
int s0 = -1, s1;
while (i.hasNext()) {
s1 = i.next();
d += Math.abs(s0 - s1);
if (s0 > 0) d += Math.abs(s0 - s1);
s0 = s1;
}
return d / (positions().size() - 1);

Loading…
Cancel
Save