@ -351,7 +351,8 @@ public class Segment {
final Document document ,
final Document document ,
final Condenser condenser ,
final Condenser condenser ,
final SearchEvent searchEvent ,
final SearchEvent searchEvent ,
final String sourceName
final String sourceName ,
final boolean storeToRWI
) {
) {
final long startTime = System . currentTimeMillis ( ) ;
final long startTime = System . currentTimeMillis ( ) ;
@ -411,59 +412,61 @@ public class Segment {
final int urlComps = MultiProtocolURI . urlComps ( url . toString ( ) ) . length ;
final int urlComps = MultiProtocolURI . urlComps ( url . toString ( ) ) . length ;
// create a word prototype which is re-used for all entries
// create a word prototype which is re-used for all entries
final int len = ( document = = null ) ? urlLength : document . dc_title ( ) . length ( ) ;
if ( ( this . termIndex ! = null & & storeToRWI ) | | searchEvent ! = null ) {
final WordReferenceRow ientry = new WordReferenceRow (
final int len = ( document = = null ) ? urlLength : document . dc_title ( ) . length ( ) ;
url . hash ( ) ,
final WordReferenceRow ientry = new WordReferenceRow (
urlLength , urlComps , len ,
url . hash ( ) ,
condenser . RESULT_NUMB_WORDS ,
urlLength , urlComps , len ,
condenser . RESULT_NUMB_SENTENCES ,
condenser . RESULT_NUMB_WORDS ,
modDate . getTime ( ) ,
condenser . RESULT_NUMB_SENTENCES ,
System . currentTimeMillis ( ) ,
modDate . getTime ( ) ,
UTF8 . getBytes ( language ) ,
System . currentTimeMillis ( ) ,
docType ,
UTF8 . getBytes ( language ) ,
outlinksSame , outlinksOther ) ;
docType ,
outlinksSame , outlinksOther ) ;
// iterate over all words of content text
Word wprop = null ;
// iterate over all words of content text
byte [ ] wordhash ;
Word wprop = null ;
String word ;
byte [ ] wordhash ;
for ( Map . Entry < String , Word > wentry : condenser . words ( ) . entrySet ( ) ) {
String word ;
word = wentry . getKey ( ) ;
for ( Map . Entry < String , Word > wentry : condenser . words ( ) . entrySet ( ) ) {
wprop = wentry . getValue ( ) ;
word = wentry . getKey ( ) ;
assert ( wprop . flags ! = null ) ;
wprop = wentry . getValue ( ) ;
ientry . setWord ( wprop ) ;
assert ( wprop . flags ! = null ) ;
wordhash = Word . word2hash ( word ) ;
ientry . setWord ( wprop ) ;
wordhash = Word . word2hash ( word ) ;
if ( this . termIndex ! = null & & storeToRWI ) try {
this . termIndex . add ( wordhash , ientry ) ;
} catch ( final Exception e ) {
Log . logException ( e ) ;
}
wordCount + + ;
// during a search event it is possible that a heuristic is used which aquires index
// data during search-time. To transfer indexed data directly to the search process
// the following lines push the index data additionally to the search process
// this is done only for searched words
if ( searchEvent ! = null & & ! searchEvent . getQuery ( ) . query_exclude_hashes . has ( wordhash ) & & searchEvent . getQuery ( ) . query_include_hashes . has ( wordhash ) ) {
// if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
ReferenceContainer < WordReference > container ;
try {
container = ReferenceContainer . emptyContainer ( Segment . wordReferenceFactory , wordhash , 1 ) ;
container . add ( ientry ) ;
rankingProcess . add ( container , true , sourceName , - 1 , 5000 ) ;
} catch ( final SpaceExceededException e ) {
continue ;
}
}
}
if ( rankingProcess ! = null ) rankingProcess . addFinalize ( ) ;
// assign the catchall word
ientry . setWord ( wprop = = null ? catchallWord : wprop ) ; // we use one of the word properties as template to get the document characteristics
if ( this . termIndex ! = null ) try {
if ( this . termIndex ! = null ) try {
this . termIndex . add ( wordhash , ientry ) ;
this . termIndex . add ( catchallH ash, ientry ) ;
} catch ( final Exception e ) {
} catch ( final Exception e ) {
Log . logException ( e ) ;
Log . logException ( e ) ;
}
}
wordCount + + ;
// during a search event it is possible that a heuristic is used which aquires index
// data during search-time. To transfer indexed data directly to the search process
// the following lines push the index data additionally to the search process
// this is done only for searched words
if ( searchEvent ! = null & & ! searchEvent . getQuery ( ) . query_exclude_hashes . has ( wordhash ) & & searchEvent . getQuery ( ) . query_include_hashes . has ( wordhash ) ) {
// if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
ReferenceContainer < WordReference > container ;
try {
container = ReferenceContainer . emptyContainer ( Segment . wordReferenceFactory , wordhash , 1 ) ;
container . add ( ientry ) ;
rankingProcess . add ( container , true , sourceName , - 1 , 5000 ) ;
} catch ( final SpaceExceededException e ) {
continue ;
}
}
}
if ( rankingProcess ! = null ) rankingProcess . addFinalize ( ) ;
// assign the catchall word
ientry . setWord ( wprop = = null ? catchallWord : wprop ) ; // we use one of the word properties as template to get the document characteristics
if ( this . termIndex ! = null ) try {
this . termIndex . add ( catchallHash , ientry ) ;
} catch ( final Exception e ) {
Log . logException ( e ) ;
}
}
// STORE PAGE REFERENCES INTO CITATION INDEX
// STORE PAGE REFERENCES INTO CITATION INDEX