replaced rwi term counting method by one that computes the maximum of the blobs that contibute to the RWI. An addition of the blob sizes is wrong/incorrect and does not reflect the real size. Truncation the size operation to the maximum of all blobs is also incorrect, but not as wrong as the sum of all blob sizes wich double-counts many rwi entries.
returnplasmaProfiling.performanceGraph(width,height,sb.indexSegment.urlMetadata().size()+" URLS / "+sb.indexSegment.termIndex().getBackendSize()+" WORDS IN INDEX / "+sb.indexSegment.termIndex().getBufferSize()+" WORDS IN CACHE");
returnplasmaProfiling.performanceGraph(width,height,sb.indexSegment.urlMetadata().size()+" URLS / "+sb.indexSegment.termIndex().sizesMax()+" WORDS IN INDEX / "+sb.indexSegment.termIndex().getBufferSize()+" WORDS IN CACHE");
@ -260,7 +260,22 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
}
publicintsize(){
returnthis.ram.size()+this.array.size();
thrownewUnsupportedOperationException("an accumulated size of index entries would not reflect the real number of words, which cannot be computed easily");
}
publicint[]sizes(){
int[]as=this.array.sizes();
int[]asr=newint[as.length+1];
System.arraycopy(as,0,asr,0,as.length);
asr[as.length]=this.ram.size();
returnasr;
}
publicintsizesMax(){
intm=0;
int[]s=sizes();
for(inti=0;i<s.length;i++)if(s[i]>m)m=s[i];
returnm;
}
publicintminMem(){
@ -312,10 +327,6 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
@ -94,8 +94,8 @@ public class plasmaDbImporter extends AbstractImporter implements Importer {
try{
this.log.logInfo("Importing DB from '"+this.importWordIndex.getLocation().getAbsolutePath()+"'");
this.log.logInfo("Home word index contains "+homeWordIndex.termIndex().size()+" words and "+homeWordIndex.urlMetadata().size()+" URLs.");
this.log.logInfo("Import word index contains "+this.importWordIndex.termIndex().size()+" words and "+this.importWordIndex.urlMetadata().size()+" URLs.");
this.log.logInfo("Home word index contains "+homeWordIndex.termIndex().sizesMax()+" words and "+homeWordIndex.urlMetadata().size()+" URLs.");
this.log.logInfo("Import word index contains "+this.importWordIndex.termIndex().sizesMax()+" words and "+this.importWordIndex.urlMetadata().size()+" URLs.");
" | Import Words = "+this.importWordIndex.termIndex().size());
"Home Words = "+homeWordIndex.termIndex().sizesMax()+
" | Import Words = "+this.importWordIndex.termIndex().sizesMax());
this.wordChunkStart=this.wordChunkEnd;
this.wordChunkStartHash=this.wordChunkEndHash;
}
@ -213,8 +213,8 @@ public class plasmaDbImporter extends AbstractImporter implements Importer {
}
}
this.log.logInfo("Home word index contains "+homeWordIndex.termIndex().size()+" words and "+homeWordIndex.urlMetadata().size()+" URLs.");
this.log.logInfo("Import word index contains "+this.importWordIndex.termIndex().size()+" words and "+this.importWordIndex.urlMetadata().size()+" URLs.");
this.log.logInfo("Home word index contains "+homeWordIndex.termIndex().sizesMax()+" words and "+homeWordIndex.urlMetadata().size()+" URLs.");
this.log.logInfo("Import word index contains "+this.importWordIndex.termIndex().sizesMax()+" words and "+this.importWordIndex.urlMetadata().size()+" URLs.");
@ -2149,7 +2149,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
peers.mySeed().put(yacySeed.LCOUNT,Integer.toString(indexSegment.urlMetadata().size()));// the number of links that the peer has stored (LURL's)
peers.mySeed().put(yacySeed.NCOUNT,Integer.toString(crawlQueues.noticeURL.size()));// the number of links that the peer has noticed, but not loaded (NURL's)
peers.mySeed().put(yacySeed.RCOUNT,Integer.toString(crawlQueues.noticeURL.stackSize(NoticedURL.STACK_TYPE_LIMIT)));// the number of links that the peer provides for remote crawling (ZURL's)
peers.mySeed().put(yacySeed.ICOUNT,Integer.toString(indexSegment.termIndex().size()));// the minimum number of words that the peer has indexed (as it says)
peers.mySeed().put(yacySeed.ICOUNT,Integer.toString(indexSegment.termIndex().sizesMax()));// the minimum number of words that the peer has indexed (as it says)
peers.mySeed().put(yacySeed.SCOUNT,Integer.toString(peers.sizeConnected()));// the number of seeds that the peer has stored
peers.mySeed().put(yacySeed.CCOUNT,Double.toString(((int)((peers.sizeConnected()+peers.sizeDisconnected()+peers.sizePotential())*60.0/(uptime+1.01))*100)/100.0));// the number of clients that the peer connects (as connects/hour)