@ -327,21 +327,18 @@ public final class plasmaWordIndexDistribution {
while ( urlIter . hasNext ( ) ) {
indexEntry = ( plasmaWordIndexEntry ) urlIter . next ( ) ;
lurl = this . urlPool . loadedURL . getEntry ( indexEntry . getUrlHash ( ) ) ;
if ( ( lurl = = null ) | | ( lurl . toString ( ) = = null ) ) {
unknownURLEntries . add ( indexEntry . getUrlHash ( ) ) ;
} else {
if ( lurl . toString ( ) = = null ) {
this . urlPool . loadedURL . remove ( indexEntry . getUrlHash ( ) ) ;
if ( ( lurl = = null ) | | ( lurl . url ( ) = = null ) ) {
unknownURLEntries . add ( indexEntry . getUrlHash ( ) ) ;
} else {
knownURLs . put ( indexEntry . getUrlHash ( ) , lurl ) ;
}
}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries . iterator ( ) ;
while ( hashIter . hasNext ( ) ) {
indexEntity . removeEntry ( ( String ) hashIter . next ( ) , false ) ;
String nextUrlHash = ( String ) hashIter . next ( ) ;
indexEntity . removeEntry ( nextUrlHash , false ) ;
this . urlPool . loadedURL . remove ( nextUrlHash ) ;
}
if ( indexEntity . size ( ) = = 0 ) {
@ -366,11 +363,7 @@ public final class plasmaWordIndexDistribution {
while ( ( urlIter . hasNext ( ) ) & & ( count > 0 ) ) {
indexEntry = ( plasmaWordIndexEntry ) urlIter . next ( ) ;
lurl = this . urlPool . loadedURL . getEntry ( indexEntry . getUrlHash ( ) ) ;
if ( lurl = = null ) {
unknownURLEntries . add ( indexEntry . getUrlHash ( ) ) ;
} else {
if ( lurl . toString ( ) = = null ) {
this . urlPool . loadedURL . remove ( indexEntry . getUrlHash ( ) ) ;
if ( ( lurl = = null ) | | ( lurl . url ( ) = = null ) ) {
unknownURLEntries . add ( indexEntry . getUrlHash ( ) ) ;
} else {
knownURLs . put ( indexEntry . getUrlHash ( ) , lurl ) ;
@ -378,11 +371,12 @@ public final class plasmaWordIndexDistribution {
count - - ;
}
}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries . iterator ( ) ;
while ( hashIter . hasNext ( ) ) {
indexEntity . removeEntry ( ( String ) hashIter . next ( ) , true ) ;
String nextUrlHash = ( String ) hashIter . next ( ) ;
indexEntity . removeEntry ( nextUrlHash , true ) ;
this . urlPool . loadedURL . remove ( nextUrlHash ) ;
}
// use whats remaining
this . log . logFine ( "Selected partial index (" + tmpEntity . size ( ) + " from " + indexEntity . size ( ) + " URLs, " + unknownURLEntries . size ( ) + " not bound) for word " + tmpEntity . wordHash ( ) ) ;