@ -51,7 +51,6 @@ import java.util.regex.Pattern;
import org.apache.solr.common.SolrDocument ;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread ;
import net.yacy.cora.date.ISO8601Formatter ;
import net.yacy.cora.document.analysis.Classification ;
import net.yacy.cora.document.analysis.Classification.ContentDomain ;
@ -95,7 +94,6 @@ import net.yacy.peers.RemoteSearch;
import net.yacy.peers.SeedDB ;
import net.yacy.peers.graphics.ProfilingGraph ;
import net.yacy.repository.Blacklist.BlacklistType ;
import net.yacy.repository.FilterEngine ;
import net.yacy.repository.LoaderDispatcher ;
import net.yacy.search.EventTracker ;
import net.yacy.search.Switchboard ;
@ -186,9 +184,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/** a set of words that are used to match with the snippets */
private final Set < String > snippetFetchWords ;
private final boolean deleteIfSnippetFail ;
private long urlRetrievalAllTime ;
private long snippetComputationAllTime ;
private ConcurrentHashMap < String , LinkedHashSet < String > > snippets ;
private final long urlRetrievalAllTime ;
private final long snippetComputationAllTime ;
private final ConcurrentHashMap < String , LinkedHashSet < String > > snippets ;
private final boolean remote ;
/** add received results to local index (defult=true) */
@ -283,7 +281,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
return Math . max (
this . local_rwi_available . get ( ) + this . remote_rwi_available . get ( ) +
this . remote_solr_available . get ( ) + Math . max ( 0 , this . local_solr_stored . get ( ) - this . local_solr_evicted . get ( ) ) ,
imageViewed. size ( ) + sizeSpare ( )
this . imageViewed. size ( ) + sizeSpare ( )
) ;
}
@ -324,17 +322,17 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long ab = MemoryControl . available ( ) ;
if ( ab < 1024 * 1024 * 200 ) {
int eb = SearchEventCache . size ( ) ;
final int eb = SearchEventCache . size ( ) ;
SearchEventCache . cleanupEvents ( false ) ;
int en = SearchEventCache . size ( ) ;
final int en = SearchEventCache . size ( ) ;
if ( en < eb ) {
log . info ( "Cleaned up search event cache (1) " + eb + "->" + en + ", " + ( ab - MemoryControl . available ( ) ) / 1024 / 1024 + " MB freed" ) ;
}
}
ab = MemoryControl . available ( ) ;
int eb = SearchEventCache . size ( ) ;
final int eb = SearchEventCache . size ( ) ;
SearchEventCache . cleanupEvents ( Math . max ( 1 , ( int ) ( MemoryControl . available ( ) / ( 1024 * 1024 * 120 ) ) ) ) ;
int en = SearchEventCache . size ( ) ;
final int en = SearchEventCache . size ( ) ;
if ( en < eb ) {
log . info ( "Cleaned up search event cache (2) " + eb + "->" + en + ", " + ( ab - MemoryControl . available ( ) ) / 1024 / 1024 + " MB freed" ) ;
}
@ -348,7 +346,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this . imagePageCounter = query . offset ;
}
this . loader = loader ;
this . nodeStack = new WeakPriorityBlockingQueue < URIMetadataNode > ( max_results_node , false ) ;
this . nodeStack = new WeakPriorityBlockingQueue < > ( max_results_node , false ) ;
this . maxExpectedRemoteReferences = new AtomicInteger ( 0 ) ;
this . expectedRemoteReferences = new AtomicInteger ( 0 ) ;
this . excludeintext_image = Switchboard . getSwitchboard ( ) . getConfigBool ( "search.excludeintext.image" , true ) ;
@ -377,7 +375,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this . protocolNavigator = protocolNavEnabled ? new ConcurrentScoreMap < > ( this ) : null ;
this . dateNavigator = dateNavEnabled ? new ConcurrentScoreMap < > ( this ) : null ;
this . topicNavigatorCount = topicsNavEnabled ? MAX_TOPWORDS : 0 ;
this . vocabularyNavigator = new TreeMap < String , ScoreMap < String > > ( ) ;
this . vocabularyNavigator = new TreeMap < > ( ) ;
// prepare configured search navigation (plugins)
this . navigatorPlugins = NavigatorPlugins . initFromCfgStrings ( navConfigs ) ;
if ( this . navigatorPlugins ! = null ) {
@ -386,14 +384,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
}
this . snippets = new ConcurrentHashMap < String , LinkedHashSet < String > > ( ) ;
this . snippets = new ConcurrentHashMap < > ( ) ;
this . secondarySearchSuperviser = ( this . query . getQueryGoal ( ) . getIncludeHashes ( ) . size ( ) > 1 ) ? new SecondarySearchSuperviser ( this ) : null ; // generate abstracts only for combined searches
if ( this . secondarySearchSuperviser ! = null ) this . secondarySearchSuperviser . start ( ) ;
this . secondarySearchThreads = null ;
this . preselectedPeerHashes = preselectedPeerHashes ;
this . IAResults = new TreeMap < byte [ ] , String > ( Base64Order . enhancedCoder ) ;
this . IACount = new TreeMap < byte [ ] , Integer > ( Base64Order . enhancedCoder ) ;
this . heuristics = new TreeMap < byte [ ] , HeuristicResult > ( Base64Order . enhancedCoder ) ;
this . IAResults = new TreeMap < > ( Base64Order . enhancedCoder ) ;
this . IACount = new TreeMap < > ( Base64Order . enhancedCoder ) ;
this . heuristics = new TreeMap < > ( Base64Order . enhancedCoder ) ;
this . IAmaxcounthash = null ;
this . IAneardhthash = null ;
this . remote = ( peers ! = null & & peers . sizeConnected ( ) > 0 ) & & ( this . query . domType = = QueryParams . Searchdom . CLUSTER | | ( this . query . domType = = QueryParams . Searchdom . GLOBAL & & Switchboard . getSwitchboard ( ) . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_ALLOW_SEARCH , false ) ) ) ;
@ -420,10 +418,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this . localSearchInclusion = null ;
this . ref = new ConcurrentScoreMap < String > ( this ) ;
this . ref = new ConcurrentScoreMap < > ( this ) ;
this . maxtime = query . maxtime ;
this . rwiStack = new WeakPriorityBlockingQueue < WordReferenceVars > ( max_results_rwi , false ) ;
this . doubleDomCache = new ConcurrentHashMap < String , WeakPriorityBlockingQueue < WordReferenceVars > > ( ) ;
this . rwiStack = new WeakPriorityBlockingQueue < > ( max_results_rwi , false ) ;
this . doubleDomCache = new ConcurrentHashMap < > ( ) ;
this . flagcount = new int [ 32 ] ;
for ( int i = 0 ; i < 32 ; i + + ) {
this . flagcount [ i ] = 0 ;
@ -435,8 +433,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this . receivedRemoteReferences = new AtomicInteger ( 0 ) ;
this . order = new ReferenceOrder ( this . query . ranking , this . query . targetlang ) ;
this . urlhashes = new RowHandleSet ( Word . commonHashLength , Word . commonHashOrder , 100 ) ;
this . taggingPredicates = new HashMap < String , String > ( ) ;
for ( Tagging t : LibraryProvider . autotagging . getVocabularies ( ) ) {
this . taggingPredicates = new HashMap < > ( ) ;
for ( final Tagging t : LibraryProvider . autotagging . getVocabularies ( ) ) {
this . taggingPredicates . put ( t . getName ( ) , t . getPredicate ( ) ) ;
}
@ -453,8 +451,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this . rwiProcess = null ;
if ( query . getSegment ( ) . connectedRWI ( ) & & ! Switchboard . getSwitchboard ( ) . getConfigBool ( SwitchboardConstants . DEBUG_SEARCH_LOCAL_DHT_OFF , false ) ) {
// we start the local search only if this peer is doing a remote search or when it is doing a local search and the peer is old
rwiProcess = new RWIProcess ( this . localsolrsearch ) ;
rwiProcess. start ( ) ;
this . rwiProcess = new RWIProcess ( this . localsolrsearch ) ;
this . rwiProcess. start ( ) ;
}
if ( this . remote ) {
@ -465,8 +463,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this . primarySearchThreadsL = null ;
this . nodeSearchThreads = null ;
} else {
this . primarySearchThreadsL = new ArrayList < RemoteSearch > ( ) ;
this . nodeSearchThreads = new ArrayList < Thread > ( ) ;
this . primarySearchThreadsL = new ArrayList < > ( ) ;
this . nodeSearchThreads = new ArrayList < > ( ) ;
// start this concurrently because the remote search needs an enumeration
// of the remote peers which may block in some cases when i.e. DHT is active
// at the same time.
@ -502,7 +500,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if ( generateAbstracts ) {
// we need the results now
try {
if ( rwiProcess ! = null & & query . getSegment ( ) . connectedRWI ( ) ) rwiProcess. join ( ) ;
if ( this . rwiProcess ! = null & & query . getSegment ( ) . connectedRWI ( ) ) this . rwiProcess. join ( ) ;
} catch ( final Throwable e ) {
}
// compute index abstracts
@ -535,7 +533,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// give process time to accumulate a certain amount of data
// before a reading process wants to get results from it
try {
if ( rwiProcess ! = null & & query . getSegment ( ) . connectedRWI ( ) & & rwiProcess . isAlive ( ) ) rwiProcess. join ( 100 ) ;
if ( this . rwiProcess ! = null & & query . getSegment ( ) . connectedRWI ( ) & & this . rwiProcess . isAlive ( ) ) this . rwiProcess. join ( 100 ) ;
} catch ( final Throwable e ) {
}
// this will reduce the maximum waiting time until results are available to 100 milliseconds
@ -547,14 +545,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this . deleteIfSnippetFail = deleteIfSnippetFail ;
this . urlRetrievalAllTime = 0 ;
this . snippetComputationAllTime = 0 ;
this . resultList = new WeakPriorityBlockingQueue < URIMetadataNode > ( Math . max ( max_results_node , 10 * query . itemsPerPage ( ) ) , true ) ; // this is the result, enriched with snippets, ranked and ordered by ranking
this . resultList = new WeakPriorityBlockingQueue < > ( Math . max ( max_results_node , 10 * query . itemsPerPage ( ) ) , true ) ; // this is the result, enriched with snippets, ranked and ordered by ranking
// snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search
boolean filtered = false ;
// check if query contains stopword
if ( Switchboard . stopwordHashes ! = null ) {
Iterator < byte [ ] > it = query . getQueryGoal ( ) . getIncludeHashes ( ) . iterator ( ) ;
final Iterator < byte [ ] > it = query . getQueryGoal ( ) . getIncludeHashes ( ) . iterator ( ) ;
while ( it . hasNext ( ) ) {
if ( Switchboard . stopwordHashes . contains ( ( it . next ( ) ) ) ) {
filtered = true ;
@ -600,7 +598,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
@Override
public void run ( ) {
if ( query. getSegment ( ) . termIndex ( ) = = null ) return ; // nothing to do; this index is not used
if ( SearchEvent. this . query. getSegment ( ) . termIndex ( ) = = null ) return ; // nothing to do; this index is not used
// do a search
oneFeederStarted ( ) ;
@ -634,7 +632,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
SearchEvent . this . query . modifier . sitehost ! = null & & SearchEvent . this . query . modifier . sitehost . length ( ) > 0
) {
// try again with sitehost
String newGoal = Domains . getSmartSLD ( SearchEvent . this . query . modifier . sitehost ) ;
final String newGoal = Domains . getSmartSLD ( SearchEvent . this . query . modifier . sitehost ) ;
search =
SearchEvent . this . query
. getSegment ( )
@ -695,7 +693,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// normalize entries
final BlockingQueue < WordReferenceVars > decodedEntries = this . order . normalizeWith ( index , maxtime , local ) ;
int is = index . size ( ) ;
final int is = index . size ( ) ;
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . EventSearch (
this . query . id ( true ) ,
SearchEventType . NORMALIZING ,
@ -708,7 +706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
timer = System . currentTimeMillis ( ) ;
// apply all constraints
long timeout = maxtime = = Long . MAX_VALUE ? Long . MAX_VALUE : System . currentTimeMillis ( ) + maxtime ;
final long timeout = maxtime = = Long . MAX_VALUE ? Long . MAX_VALUE : System . currentTimeMillis ( ) + maxtime ;
int successcounter = 0 ;
try {
WordReferenceVars iEntry ;
@ -716,7 +714,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
String acceptableAlternativeSitehash = null ;
if ( this . query . modifier . sitehost ! = null & & this . query . modifier . sitehost . length ( ) > 0 ) try {
acceptableAlternativeSitehash = DigestURL . hosthash ( this . query . modifier . sitehost . startsWith ( "www." ) ? this . query . modifier . sitehost . substring ( 4 ) : "www." + this . query . modifier . sitehost , 80 ) ;
} catch ( MalformedURLException e1 ) { }
} catch ( final MalformedURLException e1 ) { }
pollloop : while ( true ) {
remaining = timeout - System . currentTimeMillis ( ) ;
if ( remaining < = 0 ) {
@ -740,7 +738,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// increase flag counts
Bitfield flags = iEntry . flags ( ) ;
final Bitfield flags = iEntry . flags ( ) ;
for ( int j = 0 ; j < 32 ; j + + ) {
if ( flags . get ( j ) ) this . flagcount [ j ] + + ;
}
@ -806,7 +804,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this . urlhashes . putUnique ( iEntry . urlhash ( ) ) ;
rankingtryloop : while ( true ) {
try {
this . rwiStack . put ( new ReverseElement < WordReferenceVars > ( iEntry , this . order . cardinal ( iEntry ) ) ) ; // inserts the element and removes the worst (which is smallest)
this . rwiStack . put ( new ReverseElement < > ( iEntry , this . order . cardinal ( iEntry ) ) ) ; // inserts the element and removes the worst (which is smallest)
break rankingtryloop ;
} catch ( final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
@ -821,8 +819,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
if ( System . currentTimeMillis ( ) > = timeout ) ConcurrentLog . warn ( "SearchEvent" , "rwi normalization ended with timeout = " + maxtime ) ;
} catch ( final InterruptedException e ) {
} catch ( final SpaceExceededException e ) {
} catch ( final InterruptedException | SpaceExceededException e ) {
}
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
@ -847,7 +844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// stop all threads
if ( this . localsolrsearch ! = null ) {
if ( localsolrsearch. isAlive ( ) ) synchronized ( this . localsolrsearch ) { this . localsolrsearch . interrupt ( ) ; }
if ( this . localsolrsearch. isAlive ( ) ) synchronized ( this . localsolrsearch ) { this . localsolrsearch . interrupt ( ) ; }
}
if ( this . nodeSearchThreads ! = null ) {
for ( final Thread search : this . nodeSearchThreads ) {
@ -969,7 +966,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long timer = System . currentTimeMillis ( ) ;
// normalize entries
int is = nodeList . size ( ) ;
final int is = nodeList . size ( ) ;
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . EventSearch ( this . query . id ( true ) , SearchEventType . NORMALIZING , resourceName , is , System . currentTimeMillis ( ) - timer ) , false ) ;
if ( ! local ) {
this . receivedRemoteReferences . addAndGet ( is ) ;
@ -985,7 +982,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// apply all constraints
try {
pollloop : for ( URIMetadataNode iEntry : nodeList ) {
pollloop : for ( final URIMetadataNode iEntry : nodeList ) {
// check url related eventual constraints (protocol, tld, sitehost, and filetype)
final String matchingResult = QueryParams . matchesURL ( this . query . modifier , this . query . tld , iEntry . url ( ) ) ;
@ -1019,7 +1016,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// check constraints
Bitfield flags = iEntry . flags ( ) ;
final Bitfield flags = iEntry . flags ( ) ;
if ( ! this . testFlags ( flags ) ) {
if ( log . isFine ( ) ) log . fine ( "dropped Node: flag test" ) ;
updateCountsOnSolrEntryToEvict ( iEntry , facets , local , ! incrementNavigators ) ;
@ -1049,7 +1046,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// filter out media links in text search, if wanted
String ext = MultiProtocolURL . getFileExtension ( iEntry . url ( ) . getFileName ( ) ) ;
final String ext = MultiProtocolURL . getFileExtension ( iEntry . url ( ) . getFileName ( ) ) ;
if ( this . query . contentdom = = ContentDomain . TEXT & & Classification . isImageExtension ( ext ) & & this . excludeintext_image ) {
if ( log . isFine ( ) ) log . fine ( "dropped Node: file name domain does not match" ) ;
updateCountsOnSolrEntryToEvict ( iEntry , facets , local , ! incrementNavigators ) ;
@ -1097,12 +1094,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long score ;
// determine nodestack ranking (will be altered by postranking)
// so far Solr score is used (with abitrary factor to get value similar to rwi ranking values)
Float scorex = ( Float ) iEntry . getFieldValue ( "score" ) ; // this is a special field containing the ranking score of a Solr search result
final Float scorex = ( Float ) iEntry . getFieldValue ( "score" ) ; // this is a special field containing the ranking score of a Solr search result
if ( scorex ! = null & & scorex > 0 )
score = ( long ) ( ( 1000000.0f * scorex ) - iEntry . urllength ( ) ) ; // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
else
score = this . order . cardinal ( iEntry ) ;
this . nodeStack . put ( new ReverseElement < URIMetadataNode > ( iEntry , score ) ) ; // inserts the element and removes the worst (which is smallest)
this . nodeStack . put ( new ReverseElement < > ( iEntry , score ) ) ; // inserts the element and removes the worst (which is smallest)
break rankingtryloop ;
} catch ( final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
@ -1131,8 +1128,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
private void incrNavigatorsFromSolrFacets ( final Map < String , ReversibleScoreMap < String > > facets ) {
if ( facets ! = null & & ! facets . isEmpty ( ) ) {
/* Iterate over active navigator plugins to let them update the counters */
for ( String s : this . navigatorPlugins . keySet ( ) ) {
Navigator navi = this . navigatorPlugins . get ( s ) ;
for ( final String s : this . navigatorPlugins . keySet ( ) ) {
final Navigator navi = this . navigatorPlugins . get ( s ) ;
if ( navi ! = null ) {
navi . incFacet ( facets ) ;
}
@ -1144,8 +1141,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* is expressed as a spatial filter not producing facets counts ( see QueryParams . getFacetsFilterQueries ( ) ) . * /
fcts = facets . get ( CollectionSchema . coordinate_p_0_coordinate . getSolrFieldName ( ) ) ;
if ( fcts ! = null ) {
for ( String coordinate : fcts ) {
int hc = fcts . get ( coordinate ) ;
for ( final String coordinate : fcts ) {
final int hc = fcts . get ( coordinate ) ;
if ( hc = = 0 ) continue ;
this . locationNavigator . inc ( coordinate , hc ) ;
}
@ -1161,9 +1158,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
fcts = facets . get ( CollectionSchema . url_protocol_s . getSolrFieldName ( ) ) ;
if ( fcts ! = null ) {
// remove all protocols that we don't know
Iterator < String > i = fcts . iterator ( ) ;
final Iterator < String > i = fcts . iterator ( ) ;
while ( i . hasNext ( ) ) {
String protocol = i . next ( ) ;
final String protocol = i . next ( ) ;
if ( PROTOCOL_NAVIGATOR_SUPPORTED_VALUES . indexOf ( protocol ) < 0 ) {
i . remove ( ) ;
}
@ -1173,15 +1170,15 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// get the vocabulary navigation
Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) genericFacets . add ( v . getName ( ) ) ;
final Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( final Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) genericFacets . add ( v . getName ( ) ) ;
genericFacets . addAll ( ProbabilisticClassifier . getContextNames ( ) ) ;
for ( String vocName : genericFacets ) {
for ( final String vocName : genericFacets ) {
fcts = facets . get ( CollectionSchema . VOCABULARY_PREFIX + vocName + CollectionSchema . VOCABULARY_TERMS_SUFFIX ) ;
if ( fcts ! = null ) {
ScoreMap < String > vocNav = this . vocabularyNavigator . get ( vocName ) ;
if ( vocNav = = null ) {
vocNav = new ConcurrentScoreMap < String > ( ) ;
vocNav = new ConcurrentScoreMap < > ( ) ;
this . vocabularyNavigator . put ( vocName , vocNav ) ;
}
vocNav . inc ( fcts ) ;
@ -1199,8 +1196,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
final Map < String , ReversibleScoreMap < String > > facets ) {
/* Iterate over active navigator plugins to let them update the counters */
for ( String s : this . navigatorPlugins . keySet ( ) ) {
Navigator navi = this . navigatorPlugins . get ( s ) ;
for ( final String s : this . navigatorPlugins . keySet ( ) ) {
final Navigator navi = this . navigatorPlugins . get ( s ) ;
if ( navi ! = null & & facets = = null | | ! facets . containsKey ( navi . getIndexFieldName ( ) ) ) {
navi . incDoc ( doc ) ;
}
@ -1211,7 +1208,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if ( this . dateNavigator ! = null ) {
if ( facets = = null | | ! facets . containsKey ( CollectionSchema . dates_in_content_dts . getSolrFieldName ( ) ) ) {
Date [ ] dates = doc . datesInContent ( ) ;
final Date [ ] dates = doc . datesInContent ( ) ;
if ( dates ! = null ) {
for ( final Date date : dates ) {
if ( date ! = null ) {
@ -1234,12 +1231,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// get the vocabulary navigation
if ( this . vocabularyNavigator ! = null ) {
Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) {
final Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( final Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) {
genericFacets . add ( v . getName ( ) ) ;
}
genericFacets . addAll ( ProbabilisticClassifier . getContextNames ( ) ) ;
for ( String vocName : genericFacets ) {
for ( final String vocName : genericFacets ) {
final String fieldName = CollectionSchema . VOCABULARY_PREFIX + vocName + CollectionSchema . VOCABULARY_TERMS_SUFFIX ;
if ( facets = = null | | ! facets . containsKey ( fieldName ) ) {
incrementVocNavigator ( doc , vocName , fieldName ) ;
@ -1259,7 +1256,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if ( docValue instanceof String ) {
ScoreMap < String > vocNav = this . vocabularyNavigator . get ( vocName ) ;
if ( vocNav = = null ) {
vocNav = new ConcurrentScoreMap < String > ( ) ;
vocNav = new ConcurrentScoreMap < > ( ) ;
this . vocabularyNavigator . put ( vocName , vocNav ) ;
}
vocNav . inc ( ( String ) docValue ) ;
@ -1267,7 +1264,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if ( ! ( ( Collection < ? > ) docValue ) . isEmpty ( ) ) {
ScoreMap < String > vocNav = this . vocabularyNavigator . get ( vocName ) ;
if ( vocNav = = null ) {
vocNav = new ConcurrentScoreMap < String > ( ) ;
vocNav = new ConcurrentScoreMap < > ( ) ;
this . vocabularyNavigator . put ( vocName , vocNav ) ;
}
for ( final Object singleDocValue : ( Collection < ? > ) docValue ) {
@ -1306,7 +1303,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
rwi = this . rwiStack . poll ( ) ;
if ( rwi = = null ) return null ;
if ( ! skipDoubleDom ) {
URIMetadataNode node = this . query . getSegment ( ) . fulltext ( ) . getMetadata ( rwi ) ;
final URIMetadataNode node = this . query . getSegment ( ) . fulltext ( ) . getMetadata ( rwi ) ;
if ( node = = null ) {
decrementCounts ( rwi . getElement ( ) ) ;
continue pollloop ;
@ -1322,9 +1319,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
m = this . doubleDomCache . get ( hosthash ) ;
if ( m = = null ) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue < WordReferenceVars > ( max_results_rwi , false ) ;
m = new WeakPriorityBlockingQueue < > ( max_results_rwi , false ) ;
this . doubleDomCache . put ( hosthash , m ) ;
URIMetadataNode node = this . query . getSegment ( ) . fulltext ( ) . getMetadata ( rwi ) ;
final URIMetadataNode node = this . query . getSegment ( ) . fulltext ( ) . getMetadata ( rwi ) ;
if ( node = = null ) {
decrementCounts ( rwi . getElement ( ) ) ;
continue pollloop ;
@ -1390,7 +1387,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
URIMetadataNode node = null ;
try {
node = this . query . getSegment ( ) . fulltext ( ) . getMetadata ( bestEntry ) ;
} catch ( Throwable e ) {
} catch ( final Throwable e ) {
ConcurrentLog . logException ( e ) ;
}
if ( node = = null ) {
@ -1442,7 +1439,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// check content domain
ContentDomain contentDomain = page . getContentDomain ( ) ;
final ContentDomain contentDomain = page . getContentDomain ( ) ;
if ( this . query . contentdom . getCode ( ) > 0 & & (
( this . query . contentdom = = Classification . ContentDomain . IMAGE & & contentDomain ! = Classification . ContentDomain . IMAGE ) | |
( this . query . contentdom = = Classification . ContentDomain . AUDIO & & contentDomain ! = Classification . ContentDomain . AUDIO ) | |
@ -1454,7 +1451,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// filter out media links in text search, if wanted
String ext = MultiProtocolURL . getFileExtension ( page . url ( ) . getFileName ( ) ) ;
final String ext = MultiProtocolURL . getFileExtension ( page . url ( ) . getFileName ( ) ) ;
if ( this . query . contentdom = = ContentDomain . TEXT & & Classification . isImageExtension ( ext ) & & this . excludeintext_image ) {
if ( log . isFine ( ) ) log . fine ( "dropped RWI: file name domain does not match" ) ;
continue ;
@ -1480,7 +1477,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check modifier constraint collection
// this is not available in pure RWI entries (but in local or via solr query received metadate/entries),
if ( this . query . modifier . collection ! = null ) {
Collection < Object > docCols = page . getFieldValues ( CollectionSchema . collection_sxt . getSolrFieldName ( ) ) ; // get multivalued value
final Collection < Object > docCols = page . getFieldValues ( CollectionSchema . collection_sxt . getSolrFieldName ( ) ) ; // get multivalued value
if ( docCols = = null ) { // no collection info
decrementCounts ( page . word ( ) ) ;
continue ;
@ -1504,16 +1501,6 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
continue ;
}
// content control
if ( Switchboard . getSwitchboard ( ) . getConfigBool ( "contentcontrol.enabled" , false ) ) {
FilterEngine f = ContentControlFilterUpdateThread . getNetworkFilter ( ) ;
if ( f ! = null & & ! f . isListed ( page . url ( ) , null ) ) {
if ( log . isFine ( ) ) log . fine ( "dropped RWI: url is blacklisted in contentcontrol" ) ;
decrementCounts ( page . word ( ) ) ;
continue ;
}
}
final String pageurl = page . url ( ) . toNormalform ( true ) ;
final String pageauthor = page . dc_creator ( ) ;
final String pagetitle = page . dc_title ( ) . toLowerCase ( ) ;
@ -1551,9 +1538,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check geo coordinates
double lat , lon ;
if ( this . query . radius > 0.0d & & this . query . lat ! = 0.0d & & this . query . lon ! = 0.0d & & ( lat = page . lat ( ) ) ! = 0.0d & & ( lon = page . lon ( ) ) ! = 0.0d ) {
double latDelta = this . query . lat - lat ;
double lonDelta = this . query . lon - lon ;
double distance = Math . sqrt ( latDelta * latDelta + lonDelta * lonDelta ) ; // pythagoras
final double latDelta = this . query . lat - lat ;
final double lonDelta = this . query . lon - lon ;
final double distance = Math . sqrt ( latDelta * latDelta + lonDelta * lonDelta ) ; // pythagoras
if ( distance > this . query . radius ) {
if ( log . isFine ( ) ) log . fine ( "dropped RWI: radius constraint" ) ;
decrementCounts ( page . word ( ) ) ;
@ -1564,10 +1551,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check vocabulary terms (metatags) {only available in Solr index as vocabulary_xxyyzzz_sxt field}
// TODO: vocabulary is only valid and available in local Solr index (consider to auto-switch to Searchdom.LOCAL)
if ( this . query . metatags ! = null & & ! this . query . metatags . isEmpty ( ) ) {
tagloop : for ( Tagging . Metatag tag : this . query . metatags ) {
SolrDocument sdoc = page ;
tagloop : for ( final Tagging . Metatag tag : this . query . metatags ) {
final SolrDocument sdoc = page ;
if ( sdoc ! = null ) {
Collection < Object > tagvalues = sdoc . getFieldValues ( CollectionSchema . VOCABULARY_PREFIX + tag . getVocabularyName ( ) + CollectionSchema . VOCABULARY_TERMS_SUFFIX ) ;
final Collection < Object > tagvalues = sdoc . getFieldValues ( CollectionSchema . VOCABULARY_PREFIX + tag . getVocabularyName ( ) + CollectionSchema . VOCABULARY_TERMS_SUFFIX ) ;
if ( tagvalues ! = null & & tagvalues . contains ( tag . getObject ( ) ) ) {
continue tagloop ; // metatag exists check next tag (filter may consist of several tags)
}
@ -1582,8 +1569,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// TODO: it may be a little bit late here, to update navigator counters
// iterate over active navigator plugins (the rwi metadata may contain the field the plugin counts)
for ( String s : this . navigatorPlugins . keySet ( ) ) {
Navigator navi = this . navigatorPlugins . get ( s ) ;
for ( final String s : this . navigatorPlugins . keySet ( ) ) {
final Navigator navi = this . navigatorPlugins . get ( s ) ;
if ( navi ! = null ) {
navi . incDoc ( page ) ;
}
@ -1597,7 +1584,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
if ( this . dateNavigator ! = null ) {
Date [ ] dates = page . datesInContent ( ) ;
final Date [ ] dates = page . datesInContent ( ) ;
if ( dates ! = null ) {
for ( final Date date : dates ) {
if ( date ! = null ) {
@ -1609,8 +1596,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// handle the vocabulary navigator
if ( this . vocabularyNavigator ! = null ) {
Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) {
final Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( final Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) {
genericFacets . add ( v . getName ( ) ) ;
}
genericFacets . addAll ( ProbabilisticClassifier . getContextNames ( ) ) ;
@ -1674,8 +1661,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/* Iterate over active navigator plugins to let them update the counters */
for ( String s : this . navigatorPlugins . keySet ( ) ) {
Navigator navi = this . navigatorPlugins . get ( s ) ;
for ( final String s : this . navigatorPlugins . keySet ( ) ) {
final Navigator navi = this . navigatorPlugins . get ( s ) ;
if ( navi ! = null ) {
if ( navIncrementedWithFacets ) {
fcts = facets . get ( navi . getIndexFieldName ( ) ) ;
@ -1719,7 +1706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} else {
fcts = null ;
}
Date [ ] dates = entry . datesInContent ( ) ;
final Date [ ] dates = entry . datesInContent ( ) ;
if ( dates ! = null ) {
for ( final Date date : dates ) {
if ( date ! = null ) {
@ -1752,12 +1739,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// get the vocabulary navigation
if ( this . vocabularyNavigator ! = null ) {
Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) {
final Set < String > genericFacets = new LinkedHashSet < > ( ) ;
for ( final Tagging v : LibraryProvider . autotagging . getVocabularies ( ) ) {
genericFacets . add ( v . getName ( ) ) ;
}
genericFacets . addAll ( ProbabilisticClassifier . getContextNames ( ) ) ;
for ( String vocName : genericFacets ) {
for ( final String vocName : genericFacets ) {
final String fieldName = CollectionSchema . VOCABULARY_PREFIX + vocName
+ CollectionSchema . VOCABULARY_TERMS_SUFFIX ;
if ( navIncrementedWithFacets ) {
@ -1765,20 +1752,20 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} else {
fcts = null ;
}
Object docValue = entry . getFieldValue ( fieldName ) ;
final Object docValue = entry . getFieldValue ( fieldName ) ;
if ( docValue instanceof String ) {
if ( navIncrementedEarlier | | ( fcts ! = null & & fcts . containsKey ( ( String ) docValue ) ) ) {
ScoreMap < String > vocNav = this . vocabularyNavigator . get ( vocName ) ;
final ScoreMap < String > vocNav = this . vocabularyNavigator . get ( vocName ) ;
if ( vocNav ! = null & & vocNav . get ( ( String ) docValue ) > 0 ) {
vocNav . dec ( ( String ) docValue ) ;
}
}
} else if ( docValue instanceof Collection ) {
if ( ! ( ( Collection < ? > ) docValue ) . isEmpty ( ) ) {
for ( Object singleDocValue : ( Collection < ? > ) docValue ) {
for ( final Object singleDocValue : ( Collection < ? > ) docValue ) {
if ( singleDocValue instanceof String ) {
if ( navIncrementedEarlier | | ( fcts ! = null & & fcts . containsKey ( ( String ) singleDocValue ) ) ) {
ScoreMap < String > vocNav = this . vocabularyNavigator . get ( vocName ) ;
final ScoreMap < String > vocNav = this . vocabularyNavigator . get ( vocName ) ;
if ( vocNav ! = null & & vocNav . get ( ( String ) singleDocValue ) > 0 ) {
vocNav . dec ( ( String ) singleDocValue ) ;
}
@ -1815,10 +1802,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if ( this . ref . size ( ) < = ic ) { // size matches return map directly
result = this . getTopics ( /*ic, 500*/ ) ;
} else { // collect top most count topics
result = new ConcurrentScoreMap < String > ( ) ;
Iterator < String > it = this . getTopics ( /*ic, 500*/ ) . keys ( false ) ;
result = new ConcurrentScoreMap < > ( ) ;
final Iterator < String > it = this . getTopics ( /*ic, 500*/ ) . keys ( false ) ;
while ( ic - - > 0 & & it . hasNext ( ) ) {
String word = it . next ( ) ;
final String word = it . next ( ) ;
result . set ( word , this . ref . get ( word ) ) ;
}
}
@ -1836,8 +1823,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* /
public boolean drainStacksToResult ( boolean concurrentSnippetFetch ) {
// we take one entry from both stacks at the same time
boolean solrSuccess = drainSolrStackToResult ( concurrentSnippetFetch ) ;
boolean rwiSuccess = drainRWIStackToResult ( concurrentSnippetFetch ) ;
final boolean solrSuccess = drainSolrStackToResult ( concurrentSnippetFetch ) ;
final boolean rwiSuccess = drainRWIStackToResult ( concurrentSnippetFetch ) ;
return solrSuccess | | rwiSuccess ;
}
@ -1857,7 +1844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
success = true ;
}
} else {
Thread t = new Thread ( "SearchEvent.drainStacksToResult.oneFilteredFromRWI" ) {
final Thread t = new Thread ( "SearchEvent.drainStacksToResult.oneFilteredFromRWI" ) {
@Override
public void run ( ) {
SearchEvent . this . oneFeederStarted ( ) ;
@ -1894,7 +1881,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
final Element < URIMetadataNode > localEntryElement = this . nodeStack . sizeQueue ( ) > 0 ? this . nodeStack . poll ( ) : null ;
final URIMetadataNode node = localEntryElement = = null ? null : localEntryElement . getElement ( ) ;
if ( node ! = null ) {
LinkedHashSet < String > solrsnippetlines = this . snippets . remove ( ASCII . String ( node . hash ( ) ) ) ; // we can remove this because it's used only once
final LinkedHashSet < String > solrsnippetlines = this . snippets . remove ( ASCII . String ( node . hash ( ) ) ) ; // we can remove this because it's used only once
if ( solrsnippetlines ! = null & & solrsnippetlines . size ( ) > 0 ) {
OpensearchResponseWriter . removeSubsumedTitle ( solrsnippetlines , node . dc_title ( ) ) ;
final TextSnippet solrsnippet = new TextSnippet ( node . url ( ) , OpensearchResponseWriter . getLargestSnippet ( solrsnippetlines ) , true , ResultClass . SOURCE_SOLR , "" ) ;
@ -1908,7 +1895,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
false ) ;
final String solrsnippetline = solrsnippet . descriptionline ( this . getQuery ( ) . getQueryGoal ( ) ) ;
final String yacysnippetline = yacysnippet . descriptionline ( this . getQuery ( ) . getQueryGoal ( ) ) ;
URIMetadataNode re = node . makeResultEntry ( this . query . getSegment ( ) , this . peers , solrsnippetline . length ( ) > yacysnippetline . length ( ) ? solrsnippet : yacysnippet ) ;
final URIMetadataNode re = node . makeResultEntry ( this . query . getSegment ( ) , this . peers , solrsnippetline . length ( ) > yacysnippetline . length ( ) ? solrsnippet : yacysnippet ) ;
addResult ( re , localEntryElement . getWeight ( ) ) ;
success = true ;
} else {
@ -1955,8 +1942,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS));
resultEntry . setScore ( ranking ) ; // update the score of resultEntry for access by search interface / api
this . resultList . put ( new ReverseElement < URIMetadataNode > ( resultEntry , ranking ) ) ; // remove smallest in case of overflow
if ( pollImmediately) this . resultList . poll ( ) ; // prevent re-ranking in case there is only a single index source which has already ranked entries.
this . resultList . put ( new ReverseElement < > ( resultEntry , ranking ) ) ; // remove smallest in case of overflow
if ( this . pollImmediately) this . resultList . poll ( ) ; // prevent re-ranking in case there is only a single index source which has already ranked entries.
this . addTopics ( resultEntry ) ;
}
@ -1984,7 +1971,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// apply citation count
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
if ( this . query . getSegment ( ) . connectedCitation ( ) ) {
int referencesCount = this . query . getSegment ( ) . urlCitation ( ) . count ( rentry . hash ( ) ) ;
final int referencesCount = this . query . getSegment ( ) . urlCitation ( ) . count ( rentry . hash ( ) ) ;
r + = ( 128 * referencesCount / ( 1 + 2 * rentry . llocal ( ) + rentry . lother ( ) ) ) < < this . query . ranking . coeff_citation ;
}
// prefer hit with 'prefer' pattern
@ -2002,11 +1989,11 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
for ( final String urlcomp : urlcompmap ) {
int tc = topwords . get ( urlcomp ) ;
final int tc = topwords . get ( urlcomp ) ;
if ( tc > 0 ) r + = tc < < this . query . ranking . coeff_urlcompintoplist ;
}
for ( final String descrcomp : descrcompmap ) {
int tc = topwords . get ( descrcomp ) ;
final int tc = topwords . get ( descrcomp ) ;
if ( tc > 0 ) r + = tc < < this . query . ranking . coeff_descrcompintoplist ;
}
@ -2037,10 +2024,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// load snippet
ContentDomain contentDomain = page . getContentDomain ( ) ;
final ContentDomain contentDomain = page . getContentDomain ( ) ;
if ( contentDomain = = Classification . ContentDomain . TEXT | | contentDomain = = Classification . ContentDomain . ALL ) {
// attach text snippet
long startTime = System . currentTimeMillis ( ) ;
final long startTime = System . currentTimeMillis ( ) ;
final TextSnippet snippet = new TextSnippet (
this . loader ,
page ,
@ -2110,7 +2097,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* For local only search , a new SearchEvent should be created , starting directly at the requested offset ,
* thus allowing to handle last pages of large resultsets
* /
int nextitems = item - this . localsolroffset + this . query . itemsPerPage ; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
final int nextitems = item - this . localsolroffset + this . query . itemsPerPage ; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
if ( this . localsolrsearch ! = null & & this . localsolrsearch . isAlive ( ) ) { try { this . localsolrsearch . join ( ) ; } catch ( final InterruptedException e ) { } }
if ( ! Switchboard . getSwitchboard ( ) . getConfigBool ( SwitchboardConstants . DEBUG_SEARCH_LOCAL_SOLR_OFF , false ) ) {
// Do not increment again navigators from the local Solr on next local pages retrieval, as facets counts scope is on the total results and should already have been added
@ -2175,43 +2162,43 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/** Image results counter */
private int imagePageCounter = 0 ;
private LinkedHashMap < String , ImageResult > imageViewed = new LinkedHashMap < String , ImageResult > ( ) ;
private LinkedHashMap < String , ImageResult > imageSpareGood = new LinkedHashMap < String , ImageResult > ( ) ;
private LinkedHashMap < String , ImageResult > imageSpareBad = new LinkedHashMap < String , ImageResult > ( ) ;
private final LinkedHashMap < String , ImageResult > imageViewed = new LinkedHashMap < > ( ) ;
private final LinkedHashMap < String , ImageResult > imageSpareGood = new LinkedHashMap < > ( ) ;
private final LinkedHashMap < String , ImageResult > imageSpareBad = new LinkedHashMap < > ( ) ;
private ImageResult nthImage ( int item ) {
Object o = SetTools . nth ( this . imageViewed . values ( ) , item ) ;
final Object o = SetTools . nth ( this . imageViewed . values ( ) , item ) ;
if ( o = = null ) return null ;
return ( ImageResult ) o ;
}
private boolean hasSpare ( ) {
return imageSpareGood. size ( ) > 0 | | imageSpareBad. size ( ) > 0 ;
return this . imageSpareGood. size ( ) > 0 | | this . imageSpareBad. size ( ) > 0 ;
}
private boolean containsSpare ( String id ) {
return imageSpareGood . containsKey ( id ) | | imageSpareBad. containsKey ( id ) ;
return this . imageSpareGood . containsKey ( id ) | | this . imageSpareBad. containsKey ( id ) ;
}
private int sizeSpare ( ) {
return imageSpareGood . size ( ) + imageSpareBad. size ( ) ;
return this . imageSpareGood . size ( ) + this . imageSpareBad. size ( ) ;
}
private ImageResult nextSpare ( ) {
if ( imageSpareGood. size ( ) > 0 ) {
Map . Entry < String , ImageResult > next = imageSpareGood. entrySet ( ) . iterator ( ) . next ( ) ;
imageViewed. put ( next . getKey ( ) , next . getValue ( ) ) ;
imageSpareGood. remove ( next . getKey ( ) ) ;
if ( this . imageSpareGood. size ( ) > 0 ) {
final Map . Entry < String , ImageResult > next = this . imageSpareGood. entrySet ( ) . iterator ( ) . next ( ) ;
this . imageViewed. put ( next . getKey ( ) , next . getValue ( ) ) ;
this . imageSpareGood. remove ( next . getKey ( ) ) ;
return next . getValue ( ) ;
}
if ( imageSpareBad. size ( ) > 0 ) {
Map . Entry < String , ImageResult > next = imageSpareBad. entrySet ( ) . iterator ( ) . next ( ) ;
imageViewed. put ( next . getKey ( ) , next . getValue ( ) ) ;
imageSpareBad. remove ( next . getKey ( ) ) ;
if ( this . imageSpareBad. size ( ) > 0 ) {
final Map . Entry < String , ImageResult > next = this . imageSpareBad. entrySet ( ) . iterator ( ) . next ( ) ;
this . imageViewed. put ( next . getKey ( ) , next . getValue ( ) ) ;
this . imageSpareBad. remove ( next . getKey ( ) ) ;
return next . getValue ( ) ;
}
return null ;
}
public ImageResult oneImageResult ( final int item , final long timeout , final boolean strictContentDom ) throws MalformedURLException {
if ( item < imageViewed. size ( ) ) return nthImage ( item ) ;
if ( imageSpareGood. size ( ) > 0 ) return nextSpare ( ) ; // first put out all good spare, but no bad spare
URIMetadataNode doc = oneResult ( imagePageCounter+ + , timeout ) ; // we must use a different counter here because the image counter can be higher when one page filled up several spare
if ( item < this . imageViewed. size ( ) ) return nthImage ( item ) ;
if ( this . imageSpareGood. size ( ) > 0 ) return nextSpare ( ) ; // first put out all good spare, but no bad spare
final URIMetadataNode doc = oneResult ( this . imagePageCounter+ + , timeout ) ; // we must use a different counter here because the image counter can be higher when one page filled up several spare
// check if the match was made in the url or in the image links
if ( doc = = null ) {
if ( hasSpare ( ) ) return nextSpare ( ) ;
@ -2231,45 +2218,45 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check image size
final Collection < Object > height = doc . getFieldValues ( CollectionSchema . images_height_val . getSolrFieldName ( ) ) ;
final Collection < Object > width = doc . getFieldValues ( CollectionSchema . images_width_val . getSolrFieldName ( ) ) ;
int h = height = = null ? 0 : ( Integer ) height . iterator ( ) . next ( ) ; // might be -1 for unknown
int w = width = = null ? 0 : ( Integer ) width . iterator ( ) . next ( ) ;
final int h = height = = null ? 0 : ( Integer ) height . iterator ( ) . next ( ) ; // might be -1 for unknown
final int w = width = = null ? 0 : ( Integer ) width . iterator ( ) . next ( ) ;
if ( ( h < = 0 | | h > 16 ) & & ( w < = 0 | | w > 16 ) ) { // we don't want too small images (< 16x16)
if ( ! imageViewed. containsKey ( id ) & & ! containsSpare ( id ) ) imageSpareGood. put ( id , new ImageResult ( doc . url ( ) , doc . url ( ) , doc . mime ( ) , doc . title ( ) , w , h , 0 ) ) ;
if ( ! this . imageViewed. containsKey ( id ) & & ! containsSpare ( id ) ) this . imageSpareGood. put ( id , new ImageResult ( doc . url ( ) , doc . url ( ) , doc . mime ( ) , doc . title ( ) , w , h , 0 ) ) ;
}
}
} else if ( ! strictContentDom ) {
Collection < Object > altO = doc . getFieldValues ( CollectionSchema . images_alt_sxt . getSolrFieldName ( ) ) ;
Collection < Object > imgO = doc . getFieldValues ( CollectionSchema . images_urlstub_sxt . getSolrFieldName ( ) ) ;
final Collection < Object > altO = doc . getFieldValues ( CollectionSchema . images_alt_sxt . getSolrFieldName ( ) ) ;
final Collection < Object > imgO = doc . getFieldValues ( CollectionSchema . images_urlstub_sxt . getSolrFieldName ( ) ) ;
if ( imgO ! = null & & imgO . size ( ) > 0 & & imgO instanceof List < ? > ) {
List < Object > alt = altO = = null ? null : ( List < Object > ) altO ;
List < Object > img = ( List < Object > ) imgO ;
List < String > prt = CollectionConfiguration . indexedList2protocolList ( doc . getFieldValues ( CollectionSchema . images_protocol_sxt . getSolrFieldName ( ) ) , img . size ( ) ) ;
Collection < Object > heightO = doc . getFieldValues ( CollectionSchema . images_height_val . getSolrFieldName ( ) ) ;
Collection < Object > widthO = doc . getFieldValues ( CollectionSchema . images_width_val . getSolrFieldName ( ) ) ;
List < Object > height = heightO = = null ? null : ( List < Object > ) heightO ;
List < Object > width = widthO = = null ? null : ( List < Object > ) widthO ;
final List < Object > alt = altO = = null ? null : ( List < Object > ) altO ;
final List < Object > img = ( List < Object > ) imgO ;
final List < String > prt = CollectionConfiguration . indexedList2protocolList ( doc . getFieldValues ( CollectionSchema . images_protocol_sxt . getSolrFieldName ( ) ) , img . size ( ) ) ;
final Collection < Object > heightO = doc . getFieldValues ( CollectionSchema . images_height_val . getSolrFieldName ( ) ) ;
final Collection < Object > widthO = doc . getFieldValues ( CollectionSchema . images_width_val . getSolrFieldName ( ) ) ;
final List < Object > height = heightO = = null ? null : ( List < Object > ) heightO ;
final List < Object > width = widthO = = null ? null : ( List < Object > ) widthO ;
for ( int c = 0 ; c < img . size ( ) ; c + + ) {
String image_urlstub = ( String ) img . get ( c ) ;
final String image_urlstub = ( String ) img . get ( c ) ;
/ * Icons are not always . ico files and should now be indexed in icons_urlstub_sxt . But this test still makes sense for older indexed documents ,
* or documents coming from previous versions peers * /
if ( image_urlstub . endsWith ( ".ico" ) ) continue ; // we don't want favicons, makes the result look idiotic
try {
int h = height = = null ? 0 : ( Integer ) height . get ( c ) ;
int w = width = = null ? 0 : ( Integer ) width . get ( c ) ;
final int h = height = = null ? 0 : ( Integer ) height . get ( c ) ;
final int w = width = = null ? 0 : ( Integer ) width . get ( c ) ;
// check size good for display (parser may init unknown dimension with -1)
if ( h > 0 & & h < = 16 ) continue ; // to small for display
if ( w > 0 & & w < = 16 ) continue ; // to small for display
DigestURL imageUrl = new DigestURL ( ( prt ! = null & & prt . size ( ) > c ? prt . get ( c ) : "http" ) + "://" + image_urlstub ) ;
String id = ASCII . String ( imageUrl . hash ( ) ) ;
if ( ! imageViewed. containsKey ( id ) & & ! containsSpare ( id ) ) {
String image_alt = ( alt ! = null & & alt . size ( ) > c ) ? ( String ) alt . get ( c ) : "" ;
ImageResult imageResult = new ImageResult ( doc . url ( ) , imageUrl , "" , image_alt , w , h , 0 ) ;
boolean match = ( query. getQueryGoal ( ) . matches ( image_urlstub ) | | query. getQueryGoal ( ) . matches ( image_alt ) ) ;
if ( match ) imageSpareGood. put ( id , imageResult ) ; else imageSpareBad. put ( id , imageResult ) ;
final DigestURL imageUrl = new DigestURL ( ( prt ! = null & & prt . size ( ) > c ? prt . get ( c ) : "http" ) + "://" + image_urlstub ) ;
final String id = ASCII . String ( imageUrl . hash ( ) ) ;
if ( ! this . imageViewed. containsKey ( id ) & & ! containsSpare ( id ) ) {
final String image_alt = ( alt ! = null & & alt . size ( ) > c ) ? ( String ) alt . get ( c ) : "" ;
final ImageResult imageResult = new ImageResult ( doc . url ( ) , imageUrl , "" , image_alt , w , h , 0 ) ;
final boolean match = ( this . query. getQueryGoal ( ) . matches ( image_urlstub ) | | this . query. getQueryGoal ( ) . matches ( image_alt ) ) ;
if ( match ) this . imageSpareGood. put ( id , imageResult ) ; else this . imageSpareBad. put ( id , imageResult ) ;
}
} catch ( MalformedURLException e ) {
} catch ( final MalformedURLException e ) {
continue ;
}
}
@ -2303,7 +2290,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
int i = 0 ;
while ( this . resultList . sizeAvailable ( ) < this . query . neededResults ( ) & & System . currentTimeMillis ( ) < timeout ) {
URIMetadataNode re = oneResult ( i + + , timeout - System . currentTimeMillis ( ) ) ;
final URIMetadataNode re = oneResult ( i + + , timeout - System . currentTimeMillis ( ) ) ;
if ( re = = null ) break ;
}
return this . resultList . list ( Math . min ( this . query . neededResults ( ) , this . resultList . sizeAvailable ( ) ) ) ;
@ -2331,7 +2318,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* because they were not supposed to be here . If really necessary to keep them ,
* growing the maxSize of the resultList should be considered here .
* /
WeakPriorityBlockingQueue . Element < URIMetadataNode > initialLastResult = this . resultList . getLastInQueue ( ) ;
final WeakPriorityBlockingQueue . Element < URIMetadataNode > initialLastResult = this . resultList . getLastInQueue ( ) ;
/ *
* Drain stacks in two steps ( Solr , then RWI ) , because one stack might still