enhanced remove operation in search consequences (which are triggered when the snippet fetch proves that the word has disappeared from the page that was stored in the index)
- no direct deletion of referenced during search (shifted to time after search)
- bundling of all deletions for the references of a single word into one remove operation
- enhanced remove operation by caring that the collection is stored sorted (experimental)
- more String -> byte[] transition for search word lists
- clean up of unused code
- enhanced memory allocation of RowSet Objects (will use a little bit less memory which was wasted before)
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6823 6c8d7289-2bf4-0310-a012-ef5d649a1542
protectedfinalSortStore<MediaSnippet>images;// container to sort images by size
protectedfinalHashMap<String,String>failedURLs;// a mapping from a urlhash to a fail reason string
protectedfinalHandleSet failedURLs;// a set of urlhashes that could not been verified during search
protectedfinalHandleSetsnippetFetchWordHashes;// a set of word hashes that are used to match with the snippets
longurlRetrievalAllTime;
longsnippetComputationAllTime;
@ -84,7 +84,7 @@ public class ResultFetcher {
this.snippetComputationAllTime=0;
this.result=newSortStore<ResultEntry>(-1,true);// this is the result, enriched with snippets, ranked and ordered by ranking
this.images=newSortStore<MediaSnippet>(-1,true);
this.failedURLs=newHashMap<String,String>();// a map of urls to reason strings where a worker thread tried to work on, but failed.
this.failedURLs=newHandleSet(URIMetadataRow.rowdef.primaryKeyLength,URIMetadataRow.rowdef.objectOrder,0);// a set of url hashes where a worker thread tried to work on, but failed.
// snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search
Log.logInfo("SEARCH_EVENT","text snippet load time for "+metadata.url()+": "+snippetComputationTime+", "+((snippet.getErrorCode()<11)?"snippet found":("no snippet found ("+snippet.getError()+")")));
Log.logInfo("SEARCH","text snippet load time for "+metadata.url()+": "+snippetComputationTime+", "+((snippet.getErrorCode()<11)?"snippet found":("no snippet found ("+snippet.getError()+")")));
if(snippet.getErrorCode()<11){
// we loaded the file and found the snippet
@ -241,13 +241,7 @@ public class ResultFetcher {
returnnewResultEntry(page,query.getSegment(),peers,null,null,dbRetrievalTime,snippetComputationTime);// result without snippet
}else{
// problems with snippet fetch
registerFailure(newString(page.hash()),"no text snippet for URL "+metadata.url());
Log.logInfo("SearchEvents","cleaning up event "+query.id(true)+", removed "+rw+" URL references on "+removeWords.size()+" words");
Log.logInfo("SearchEvents","cleaning up event "+query.id(true)+", removed "+rw+" URL references on "+removeWords.size()+" words in " +(System.currentTimeMillis()-start)+" milliseconds");