@ -62,11 +62,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private final char type ;
private final char type ;
private int hitcount , // how often appears this word in the text
private int hitcount , // how often appears this word in the text
llocal , lother , phrasesintext ,
llocal , lother , phrasesintext ,
posintext , // word position in text
posinphrase , posofphrase ,
posinphrase , posofphrase ,
urlcomps , urllength ,
urlcomps , urllength ,
wordsintext , wordsintitle ;
wordsintext , wordsintitle ;
private int virtualAge ;
private int virtualAge ;
private final Queue < Integer > positions ;
private Queue < Integer > positions ; // word positons of joined references
private double termFrequency ;
private double termFrequency ;
private final boolean local ;
private final boolean local ;
@ -78,6 +79,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final int hitcount , // how often appears this word in the text
final int hitcount , // how often appears this word in the text
final int wordcount , // total number of words
final int wordcount , // total number of words
final int phrasecount , // total number of phrases
final int phrasecount , // total number of phrases
final int posintext , // first position of word in text
final Queue < Integer > ps , // positions of words that are joined into the reference
final Queue < Integer > ps , // positions of words that are joined into the reference
final int posinphrase , // position of word in its phrase
final int posinphrase , // position of word in its phrase
final int posofphrase , // number of the phrase where word appears
final int posofphrase , // number of the phrase where word appears
@ -100,9 +102,15 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this . llocal = outlinksSame ;
this . llocal = outlinksSame ;
this . lother = outlinksOther ;
this . lother = outlinksOther ;
this . phrasesintext = phrasecount ;
this . phrasesintext = phrasecount ;
this . positions = new LinkedBlockingQueue < Integer > ( ) ;
if ( ! ps . isEmpty ( ) ) for ( final Integer i : ps ) this . positions . add ( i ) ;
if ( ps ! = null & & ! ps . isEmpty ( ) ) {
this . positions = new LinkedBlockingQueue < Integer > ( ) ;
for ( final Integer i : ps ) this . positions . add ( i ) ;
} else {
this . positions = null ;
}
this . posinphrase = posinphrase ;
this . posinphrase = posinphrase ;
this . posintext = posintext ;
this . posofphrase = posofphrase ;
this . posofphrase = posofphrase ;
this . urlcomps = urlComps ;
this . urlcomps = urlComps ;
this . urllength = urlLength ;
this . urllength = urlLength ;
@ -124,9 +132,15 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this . llocal = e . llocal ( ) ;
this . llocal = e . llocal ( ) ;
this . lother = e . lother ( ) ;
this . lother = e . lother ( ) ;
this . phrasesintext = e . phrasesintext ( ) ;
this . phrasesintext = e . phrasesintext ( ) ;
this . positions = new LinkedBlockingQueue < Integer > ( ) ;
if ( ! e . positions ( ) . isEmpty ( ) ) for ( final Integer i : e . positions ( ) ) this . positions . add ( i ) ;
if ( e . positions ( ) ! = null & & ! e . positions ( ) . isEmpty ( ) ) {
this . positions = new LinkedBlockingQueue < Integer > ( ) ;
for ( final Integer i : e . positions ( ) ) this . positions . add ( i ) ;
} else {
this . positions = null ;
}
this . posinphrase = e . posinphrase ( ) ;
this . posinphrase = e . posinphrase ( ) ;
this . posintext = e . posintext ( ) ;
this . posofphrase = e . posofphrase ( ) ;
this . posofphrase = e . posofphrase ( ) ;
this . urlcomps = e . urlcomps ( ) ;
this . urlcomps = e . urlcomps ( ) ;
this . urllength = e . urllength ( ) ;
this . urllength = e . urllength ( ) ;
@ -152,6 +166,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this . phrasesintext = 0 ;
this . phrasesintext = 0 ;
this . positions = null ;
this . positions = null ;
this . posinphrase = 0 ;
this . posinphrase = 0 ;
this . posintext = 0 ;
this . posofphrase = 0 ;
this . posofphrase = 0 ;
this . urlcomps = 0 ;
this . urlcomps = 0 ;
this . urllength = 0 ;
this . urllength = 0 ;
@ -172,6 +187,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this . hitcount ,
this . hitcount ,
this . wordsintext ,
this . wordsintext ,
this . phrasesintext ,
this . phrasesintext ,
this . posintext ,
this . positions ,
this . positions ,
this . posinphrase ,
this . posinphrase ,
this . posofphrase ,
this . posofphrase ,
@ -234,6 +250,20 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return this . posinphrase ;
return this . posinphrase ;
}
}
/ * *
* First word position in text .
* @return min position
* /
@Override
public int posintext ( ) {
return this . posintext ;
}
/ * *
* Word positions for joined references ( for multi word queries ) .
* @see posintext ( )
* @return the word positions of the joined references
* /
@Override
@Override
public Collection < Integer > positions ( ) {
public Collection < Integer > positions ( ) {
return this . positions ;
return this . positions ;
@ -253,7 +283,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this . hitcount , // how often appears this word in the text
this . hitcount , // how often appears this word in the text
this . wordsintext , // total number of words
this . wordsintext , // total number of words
this . phrasesintext , // total number of phrases
this . phrasesintext , // total number of phrases
this . posi tions. isEmpty ( ) ? 0 : minposition ( ) , // position of word in all words (WordReferenceRow stores first position in text , minpos also important for joined references )
this . posi ntext, // position of word in all words (WordReferenceRow stores first position in text )
this . posinphrase , // position of word in its phrase
this . posinphrase , // position of word in its phrase
this . posofphrase , // number of the phrase where word appears
this . posofphrase , // number of the phrase where word appears
this . lastModified , // last-modified time of the document where word appears
this . lastModified , // last-modified time of the document where word appears
@ -336,21 +366,19 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if ( virtualAge ( ) > ( v = other . virtualAge ( ) ) ) this . virtualAge = v ;
if ( virtualAge ( ) > ( v = other . virtualAge ( ) ) ) this . virtualAge = v ;
if ( this . wordsintext > ( v = other . wordsintext ) ) this . wordsintext = v ;
if ( this . wordsintext > ( v = other . wordsintext ) ) this . wordsintext = v ;
if ( this . phrasesintext > ( v = other . phrasesintext ) ) this . phrasesintext = v ;
if ( this . phrasesintext > ( v = other . phrasesintext ) ) this . phrasesintext = v ;
if ( this . posintext > ( v = other . posintext ) ) this . posintext = v ;
int minpos = min ( this . positions , other . positions ) ;
// calculate and remember min distance
if ( minpos ! = Integer . MAX_VALUE ) {
if ( this . positions ! = null | | other . positions ! = null ) {
int odist = other . distance ( ) ;
int odist = other . distance ( ) ;
int dist = this . distance ( ) ;
int dist = this . distance ( ) ;
this . positions . clear ( ) ; // we want only the min
this . positions . add ( minpos ) ;
// handle distance for multi word queries
// distance is calculated from positions, must be at least 2 positions for calculation
if ( odist > 0 & & odist < dist ) {
if ( odist > 0 & & odist < dist ) {
this . positions . add ( minpos + odist ) ;
if ( this . positions = = null ) {
} else if ( dist > 0 ) {
this . positions = new LinkedBlockingQueue < Integer > ( ) ;
this . positions . add ( minpos + dist ) ;
} else {
} else if ( odist > 0 ) {
this . positions . clear ( ) ;
this . positions . add ( minpos + odist ) ;
}
this . positions . add ( this . posintext + odist ) ;
}
}
}
}
@ -375,19 +403,19 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if ( virtualAge ( ) < ( v = other . virtualAge ( ) ) ) this . virtualAge = v ;
if ( virtualAge ( ) < ( v = other . virtualAge ( ) ) ) this . virtualAge = v ;
if ( this . wordsintext < ( v = other . wordsintext ) ) this . wordsintext = v ;
if ( this . wordsintext < ( v = other . wordsintext ) ) this . wordsintext = v ;
if ( this . phrasesintext < ( v = other . phrasesintext ) ) this . phrasesintext = v ;
if ( this . phrasesintext < ( v = other . phrasesintext ) ) this . phrasesintext = v ;
if ( this . posintext < ( v = other . posintext ) ) this . posintext = v ;
int maxpos = max ( this . positions , other . positions ) ;
// calculate and remember max distance
if ( maxpos ! = Integer . MIN_VALUE ) {
if ( this . positions ! = null | | other . positions ! = null ) {
int odist = other . distance ( ) ;
int odist = other . distance ( ) ;
int dist = this . distance ( ) ;
int dist = this . distance ( ) ;
this . positions . clear ( ) ;
if ( odist > 0 & & odist > dist ) {
this . positions . add ( maxpos ) ;
if ( this . positions = = null ) {
// handle distance for multi word queries
this . positions = new LinkedBlockingQueue < Integer > ( ) ;
// distance is calculated from positions, must be at least 2 positions for calculation
} else {
if ( odist > dist ) {
this . positions . clear ( ) ;
this . positions . add ( maxpos - odist ) ; // special cas for max, to not be altered by the pos for distance use pos before maxpos
}
} else if ( dist > 0 ) {
this . positions . add ( this . posintext + odist ) ;
this . positions . add ( maxpos - dist ) ;
}
}
}
}
@ -404,18 +432,27 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
/ * *
/ * *
* joins two entries into one entry
* joins two entries into one entry
*
*
* Main usage is on multi word searches to combine the position values for distance ranking ,
* Main usage is on multi word searches to combine the position values for ranking and word distance calculation ,
* A Join is valid for the same url .
* A Join is valid for the same url .
* @param r WordReference
* @param r WordReference
* /
* /
@Override
@Override
public void join ( final Reference r ) {
public void join ( final Reference r ) {
// combine the distance
final WordReference oe = ( WordReference ) r ;
final WordReference oe = ( WordReference ) r ;
this . positions . addAll ( oe . positions ( ) ) ;
// choose min posintext (for > 0)
if ( this . posintext > 0 & & oe . posintext ( ) > 0 ) {
if ( this . posintext > oe . posintext ( ) ) {
this . addPosition ( this . posintext ) ; // remember larger position (for distance calculation)
this . posintext = oe . posintext ( ) ;
} else {
this . addPosition ( oe . posintext ( ) ) ; // remember other position (for distance calculation)
}
} else if ( this . posintext = = 0 ) {
this . posintext = oe . posintext ( ) ;
}
// join phrase
// join phrase
// this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
// this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
// this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());
// this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());
@ -465,8 +502,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return o1 . compareTo ( o2 ) ;
return o1 . compareTo ( o2 ) ;
}
}
/ * *
* Add a position for word distance calculation to the list if position > 0
* @param position
* /
public void addPosition ( final int position ) {
public void addPosition ( final int position ) {
this . positions . add ( position ) ;
if ( this . positions = = null & & position > 0 ) this . positions = new LinkedBlockingQueue < Integer > ( ) ;
if ( position > 0 ) this . positions . add ( position ) ;
}
}
/ * *
/ * *
@ -474,7 +516,6 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
* @param container
* @param container
* @return a blocking queue filled with WordReferenceVars that is still filled when the object is returned
* @return a blocking queue filled with WordReferenceVars that is still filled when the object is returned
* /
* /
public static BlockingQueue < WordReferenceVars > transform ( final ReferenceContainer < WordReference > container , final long maxtime , final boolean local ) {
public static BlockingQueue < WordReferenceVars > transform ( final ReferenceContainer < WordReference > container , final long maxtime , final boolean local ) {
final LinkedBlockingQueue < WordReferenceVars > vars = new LinkedBlockingQueue < WordReferenceVars > ( ) ;
final LinkedBlockingQueue < WordReferenceVars > vars = new LinkedBlockingQueue < WordReferenceVars > ( ) ;
if ( container . size ( ) < = 100 ) {
if ( container . size ( ) < = 100 ) {