@ -37,12 +37,13 @@ import java.util.TreeSet;
import java.util.concurrent.BlockingQueue ;
import java.util.concurrent.BlockingQueue ;
import java.util.concurrent.ConcurrentHashMap ;
import java.util.concurrent.ConcurrentHashMap ;
import java.util.concurrent.TimeUnit ;
import java.util.concurrent.TimeUnit ;
import java.util.regex.Pattern ;
import net.yacy.cora.document.ASCII ;
import net.yacy.cora.document.ASCII ;
import net.yacy.cora.document.MultiProtocolURI ;
import net.yacy.cora.document.MultiProtocolURI ;
import net.yacy.cora.protocol.Scanner ;
import net.yacy.cora.protocol.Scanner ;
import net.yacy.cora.storage.ConcurrentScoreMap ;
import net.yacy.cora.storage.ClusteredScoreMap ;
import net.yacy.cora.storage.ClusteredScoreMap ;
import net.yacy.cora.storage.ConcurrentScoreMap ;
import net.yacy.cora.storage.ScoreMap ;
import net.yacy.cora.storage.ScoreMap ;
import net.yacy.cora.storage.WeakPriorityBlockingQueue ;
import net.yacy.cora.storage.WeakPriorityBlockingQueue ;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement ;
import net.yacy.cora.storage.WeakPriorityBlockingQueue.ReverseElement ;
@ -55,7 +56,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.ReferenceContainer ;
import net.yacy.kelondro.rwi.ReferenceContainer ;
import net.yacy.kelondro.rwi.TermSearch ;
import net.yacy.kelondro.rwi.TermSearch ;
import net.yacy.kelondro.util.EventTracker ;
import net.yacy.kelondro.util.EventTracker ;
import de.anomic.yacy.graphics.ProfilingGraph ;
import de.anomic.yacy.graphics.ProfilingGraph ;
public final class RankingProcess extends Thread {
public final class RankingProcess extends Thread {
@ -132,16 +132,16 @@ public final class RankingProcess extends Thread {
// sort the local containers and truncate it to a limited count,
// sort the local containers and truncate it to a limited count,
// so following sortings together with the global results will be fast
// so following sortings together with the global results will be fast
try {
try {
long timer = System . currentTimeMillis ( ) ;
final long timer = System . currentTimeMillis ( ) ;
final TermSearch < WordReference > search = this . query . getSegment ( ) . termIndex ( ) . query (
final TermSearch < WordReference > search = this . query . getSegment ( ) . termIndex ( ) . query (
query. queryHashes ,
this . query. queryHashes ,
query. excludeHashes ,
this . query. excludeHashes ,
null ,
null ,
Segment . wordReferenceFactory ,
Segment . wordReferenceFactory ,
query. maxDistance ) ;
this . query. maxDistance ) ;
this . localSearchInclusion = search . inclusion ( ) ;
this . localSearchInclusion = search . inclusion ( ) ;
final ReferenceContainer < WordReference > index = search . joined ( ) ;
final ReferenceContainer < WordReference > index = search . joined ( ) ;
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . searchEvent ( query. id ( true ) , SearchEvent . Type . JOIN , query. queryString , index . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . searchEvent ( this . query. id ( true ) , SearchEvent . Type . JOIN , this . query. queryString , index . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
if ( index . isEmpty ( ) ) {
if ( index . isEmpty ( ) ) {
return ;
return ;
}
}
@ -157,7 +157,7 @@ public final class RankingProcess extends Thread {
public void add (
public void add (
final ReferenceContainer < WordReference > index ,
final ReferenceContainer < WordReference > index ,
final boolean local ,
final boolean local ,
String resourceName ,
final String resourceName ,
final int fullResource ,
final int fullResource ,
final boolean finalizeAddAtEnd ) {
final boolean finalizeAddAtEnd ) {
// we collect the urlhashes and construct a list with urlEntry objects
// we collect the urlhashes and construct a list with urlEntry objects
@ -180,11 +180,11 @@ public final class RankingProcess extends Thread {
// normalize entries
// normalize entries
final BlockingQueue < WordReferenceVars > decodedEntries = this . order . normalizeWith ( index ) ;
final BlockingQueue < WordReferenceVars > decodedEntries = this . order . normalizeWith ( index ) ;
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . searchEvent ( query. id ( true ) , SearchEvent . Type . NORMALIZING , resourceName , index . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . searchEvent ( this . query. id ( true ) , SearchEvent . Type . NORMALIZING , resourceName , index . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
// iterate over normalized entries and select some that are better than currently stored
// iterate over normalized entries and select some that are better than currently stored
timer = System . currentTimeMillis ( ) ;
timer = System . currentTimeMillis ( ) ;
boolean nav_hosts = this . query . navigators . equals ( "all" ) | | this . query . navigators . indexOf ( "hosts" ) > = 0 ;
final boolean nav_hosts = this . query . navigators . equals ( "all" ) | | this . query . navigators . indexOf ( "hosts" ) > = 0 ;
// apply all constraints
// apply all constraints
try {
try {
@ -197,7 +197,7 @@ public final class RankingProcess extends Thread {
// increase flag counts
// increase flag counts
for ( int j = 0 ; j < 32 ; j + + ) {
for ( int j = 0 ; j < 32 ; j + + ) {
if ( iEntry . flags ( ) . get ( j ) ) { flagcount[ j ] + + ; }
if ( iEntry . flags ( ) . get ( j ) ) { this . flagcount[ j ] + + ; }
}
}
// check constraints
// check constraints
@ -206,11 +206,11 @@ public final class RankingProcess extends Thread {
}
}
// check document domain
// check document domain
if ( query. contentdom ! = ContentDomain . TEXT ) {
if ( this . query. contentdom ! = ContentDomain . TEXT ) {
if ( ( query. contentdom = = ContentDomain . AUDIO ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasaudio ) ) ) ) { continue ; }
if ( ( this . query. contentdom = = ContentDomain . AUDIO ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasaudio ) ) ) ) { continue ; }
if ( ( query. contentdom = = ContentDomain . VIDEO ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasvideo ) ) ) ) { continue ; }
if ( ( this . query. contentdom = = ContentDomain . VIDEO ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasvideo ) ) ) ) { continue ; }
if ( ( query. contentdom = = ContentDomain . IMAGE ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasimage ) ) ) ) { continue ; }
if ( ( this . query. contentdom = = ContentDomain . IMAGE ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasimage ) ) ) ) { continue ; }
if ( ( query. contentdom = = ContentDomain . APP ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasapp ) ) ) ) { continue ; }
if ( ( this . query. contentdom = = ContentDomain . APP ) & & ( ! ( iEntry . flags ( ) . get ( Condenser . flag_cat_hasapp ) ) ) ) { continue ; }
}
}
// check tld domain
// check tld domain
@ -226,27 +226,27 @@ public final class RankingProcess extends Thread {
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
// check site constraints
// check site constraints
String hosthash = iEntry . hosthash ( ) ;
final String hosthash = iEntry . hosthash ( ) ;
if ( query. sitehash = = null ) {
if ( this . query. sitehash = = null ) {
// no site constraint there; maybe collect host navigation information
// no site constraint there; maybe collect host navigation information
if ( nav_hosts & & query. urlMask_isCatchall ) {
if ( nav_hosts & & this . query. urlMask_isCatchall ) {
this . hostNavigator . inc ( hosthash ) ;
this . hostNavigator . inc ( hosthash ) ;
this . hostResolver . put ( hosthash , iEntry . urlhash ( ) ) ;
this . hostResolver . put ( hosthash , iEntry . urlhash ( ) ) ;
}
}
} else {
} else {
if ( ! hosthash . equals ( query. sitehash ) ) {
if ( ! hosthash . equals ( this . query. sitehash ) ) {
// filter out all domains that do not match with the site constraint
// filter out all domains that do not match with the site constraint
continue ;
continue ;
}
}
}
}
// finally make a double-check and insert result to stack
// finally make a double-check and insert result to stack
if ( urlhashes. add ( iEntry . urlhash ( ) ) ) {
if ( this . urlhashes. add ( iEntry . urlhash ( ) ) ) {
rankingtryloop : while ( true ) {
rankingtryloop : while ( true ) {
try {
try {
stack. put ( new ReverseElement < WordReferenceVars > ( iEntry , this . order . cardinal ( iEntry ) ) ) ; // inserts the element and removes the worst (which is smallest)
this . stack. put ( new ReverseElement < WordReferenceVars > ( iEntry , this . order . cardinal ( iEntry ) ) ) ; // inserts the element and removes the worst (which is smallest)
break rankingtryloop ;
break rankingtryloop ;
} catch ( ArithmeticException e ) {
} catch ( final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
// this may happen if the concurrent normalizer changes values during cardinal computation
continue rankingtryloop ;
continue rankingtryloop ;
}
}
@ -256,12 +256,12 @@ public final class RankingProcess extends Thread {
}
}
}
}
} catch ( InterruptedException e ) { } finally {
} catch ( final InterruptedException e ) { } finally {
if ( finalizeAddAtEnd ) this . addRunning = false ;
if ( finalizeAddAtEnd ) this . addRunning = false ;
}
}
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . searchEvent ( query. id ( true ) , SearchEvent . Type . PRESORT , resourceName , index . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . searchEvent ( this . query. id ( true ) , SearchEvent . Type . PRESORT , resourceName , index . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
}
}
/ * *
/ * *
@ -281,18 +281,18 @@ public final class RankingProcess extends Thread {
}
}
private boolean testFlags ( final WordReference ientry ) {
private boolean testFlags ( final WordReference ientry ) {
if ( query. constraint = = null ) return true ;
if ( this . query. constraint = = null ) return true ;
// test if ientry matches with filter
// test if ientry matches with filter
// if all = true: let only entries pass that has all matching bits
// if all = true: let only entries pass that has all matching bits
// if all = false: let all entries pass that has at least one matching bit
// if all = false: let all entries pass that has at least one matching bit
if ( query. allofconstraint ) {
if ( this . query. allofconstraint ) {
for ( int i = 0 ; i < 32 ; i + + ) {
for ( int i = 0 ; i < 32 ; i + + ) {
if ( ( query. constraint . get ( i ) ) & & ( ! ientry . flags ( ) . get ( i ) ) ) return false ;
if ( ( this . query. constraint . get ( i ) ) & & ( ! ientry . flags ( ) . get ( i ) ) ) return false ;
}
}
return true ;
return true ;
}
}
for ( int i = 0 ; i < 32 ; i + + ) {
for ( int i = 0 ; i < 32 ; i + + ) {
if ( ( query. constraint . get ( i ) ) & & ( ientry . flags ( ) . get ( i ) ) ) return true ;
if ( ( this . query. constraint . get ( i ) ) & & ( ientry . flags ( ) . get ( i ) ) ) return true ;
}
}
return false ;
return false ;
}
}
@ -300,7 +300,7 @@ public final class RankingProcess extends Thread {
protected Map < byte [ ] , ReferenceContainer < WordReference > > searchContainerMap ( ) {
protected Map < byte [ ] , ReferenceContainer < WordReference > > searchContainerMap ( ) {
// direct access to the result maps is needed for abstract generation
// direct access to the result maps is needed for abstract generation
// this is only available if execQuery() was called before
// this is only available if execQuery() was called before
return localSearchInclusion;
return this . localSearchInclusion;
}
}
private WeakPriorityBlockingQueue . Element < WordReferenceVars > takeRWI ( final boolean skipDoubleDom , final long waitingtime ) {
private WeakPriorityBlockingQueue . Element < WordReferenceVars > takeRWI ( final boolean skipDoubleDom , final long waitingtime ) {
@ -313,14 +313,14 @@ public final class RankingProcess extends Thread {
try {
try {
//System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
//System.out.println("stack.poll: feeders = " + this.feeders + ", stack.sizeQueue = " + stack.sizeQueue());
int loops = 0 ; // a loop counter to terminate the reading if all the results are from the same domain
int loops = 0 ; // a loop counter to terminate the reading if all the results are from the same domain
long timeout = System . currentTimeMillis ( ) + waitingtime ;
final long timeout = System . currentTimeMillis ( ) + waitingtime ;
while ( ( ( ! feedingIsFinished ( ) & & this . addRunning ) | | stack. sizeQueue ( ) > 0 ) & &
while ( ( ( ! feedingIsFinished ( ) & & this . addRunning ) | | this . stack. sizeQueue ( ) > 0 ) & &
( this . query . itemsPerPage < 1 | | loops + + < this . query . itemsPerPage ) ) {
( this . query . itemsPerPage < 1 | | loops + + < this . query . itemsPerPage ) ) {
if ( waitingtime < = 0 ) {
if ( waitingtime < = 0 ) {
rwi = stack. poll ( ) ;
rwi = this . stack. poll ( ) ;
} else timeoutloop : while ( System . currentTimeMillis ( ) < timeout ) {
} else timeoutloop : while ( System . currentTimeMillis ( ) < timeout ) {
if ( feedingIsFinished ( ) & & stack. sizeQueue ( ) = = 0 ) break timeoutloop ;
if ( feedingIsFinished ( ) & & this . stack. sizeQueue ( ) = = 0 ) break timeoutloop ;
rwi = stack. poll ( 50 ) ;
rwi = this . stack. poll ( 50 ) ;
if ( rwi ! = null ) break timeoutloop ;
if ( rwi ! = null ) break timeoutloop ;
}
}
if ( rwi = = null ) break ;
if ( rwi = = null ) break ;
@ -335,7 +335,7 @@ public final class RankingProcess extends Thread {
m = this . doubleDomCache . get ( hosthash ) ;
m = this . doubleDomCache . get ( hosthash ) ;
if ( m = = null ) {
if ( m = = null ) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned
// first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue < WordReferenceVars > ( ( query. specialRights ) ? maxDoubleDomSpecial : maxDoubleDomAll ) ;
m = new WeakPriorityBlockingQueue < WordReferenceVars > ( ( this . query. specialRights ) ? maxDoubleDomSpecial : maxDoubleDomAll ) ;
this . doubleDomCache . put ( hosthash , m ) ;
this . doubleDomCache . put ( hosthash , m ) ;
return rwi ;
return rwi ;
}
}
@ -343,7 +343,7 @@ public final class RankingProcess extends Thread {
m . put ( rwi ) ;
m . put ( rwi ) ;
}
}
}
}
} catch ( InterruptedException e1 ) { }
} catch ( final InterruptedException e1 ) { }
if ( this . doubleDomCache . isEmpty ( ) ) return null ;
if ( this . doubleDomCache . isEmpty ( ) ) return null ;
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
@ -355,7 +355,7 @@ public final class RankingProcess extends Thread {
while ( i . hasNext ( ) ) {
while ( i . hasNext ( ) ) {
try {
try {
m = i . next ( ) ;
m = i . next ( ) ;
} catch ( ConcurrentModificationException e ) {
} catch ( final ConcurrentModificationException e ) {
Log . logException ( e ) ;
Log . logException ( e ) ;
continue ; // not the best solution...
continue ; // not the best solution...
}
}
@ -400,7 +400,7 @@ public final class RankingProcess extends Thread {
if ( obrwi = = null ) return null ; // all time was already wasted in takeRWI to get another element
if ( obrwi = = null ) return null ; // all time was already wasted in takeRWI to get another element
final URIMetadataRow page = this . query . getSegment ( ) . urlMetadata ( ) . load ( obrwi ) ;
final URIMetadataRow page = this . query . getSegment ( ) . urlMetadata ( ) . load ( obrwi ) ;
if ( page = = null ) {
if ( page = = null ) {
misses. add ( obrwi . getElement ( ) . urlhash ( ) ) ;
this . misses. add ( obrwi . getElement ( ) . urlhash ( ) ) ;
continue ;
continue ;
}
}
@ -413,9 +413,9 @@ public final class RankingProcess extends Thread {
continue ; // rare case where the url is corrupted
continue ; // rare case where the url is corrupted
}
}
if ( ! query. urlMask_isCatchall ) {
if ( ! this . query. urlMask_isCatchall ) {
// check url mask
// check url mask
if ( ! metadata . matches ( query. urlMask ) ) {
if ( ! metadata . matches ( this . query. urlMask ) ) {
this . sortout + + ;
this . sortout + + ;
continue ;
continue ;
}
}
@ -439,18 +439,18 @@ public final class RankingProcess extends Thread {
final String pagetitle = metadata . dc_title ( ) . toLowerCase ( ) ;
final String pagetitle = metadata . dc_title ( ) . toLowerCase ( ) ;
// check exclusion
// check exclusion
if ( ( QueryParams . anymatch ( pagetitle , query. excludeHashes ) ) | |
if ( ( QueryParams . anymatch ( pagetitle , this . query. excludeHashes ) ) | |
( QueryParams . anymatch ( pageurl . toLowerCase ( ) , query. excludeHashes ) ) | |
( QueryParams . anymatch ( pageurl . toLowerCase ( ) , this . query. excludeHashes ) ) | |
( QueryParams . anymatch ( pageauthor . toLowerCase ( ) , query. excludeHashes ) ) ) {
( QueryParams . anymatch ( pageauthor . toLowerCase ( ) , this . query. excludeHashes ) ) ) {
this . sortout + + ;
this . sortout + + ;
continue ;
continue ;
}
}
// check index-of constraint
// check index-of constraint
if ( ( query. constraint ! = null ) & &
if ( ( this . query. constraint ! = null ) & &
( query. constraint . get ( Condenser . flag_cat_indexof ) ) & &
( this . query. constraint . get ( Condenser . flag_cat_indexof ) ) & &
( ! ( pagetitle . startsWith ( "index of" ) ) ) ) {
( ! ( pagetitle . startsWith ( "index of" ) ) ) ) {
final Iterator < byte [ ] > wi = query. queryHashes . iterator ( ) ;
final Iterator < byte [ ] > wi = this . query. queryHashes . iterator ( ) ;
while ( wi . hasNext ( ) ) {
while ( wi . hasNext ( ) ) {
this . query . getSegment ( ) . termIndex ( ) . removeDelayed ( wi . next ( ) , page . hash ( ) ) ;
this . query . getSegment ( ) . termIndex ( ) . removeDelayed ( wi . next ( ) , page . hash ( ) ) ;
}
}
@ -459,18 +459,18 @@ public final class RankingProcess extends Thread {
}
}
// check location constraint
// check location constraint
if ( ( query. constraint ! = null ) & &
if ( ( this . query. constraint ! = null ) & &
( query. constraint . get ( Condenser . flag_cat_haslocation ) ) & &
( this . query. constraint . get ( Condenser . flag_cat_haslocation ) ) & &
( metadata . lat ( ) = = 0.0f | | metadata . lon ( ) = = 0.0f ) ) {
( metadata . lat ( ) = = 0.0f | | metadata . lon ( ) = = 0.0f ) ) {
this . sortout + + ;
this . sortout + + ;
continue ;
continue ;
}
}
// check content domain
// check content domain
if ( ( query. contentdom = = ContentDomain . AUDIO & & page . laudio ( ) = = 0 ) | |
if ( ( this . query. contentdom = = ContentDomain . AUDIO & & page . laudio ( ) = = 0 ) | |
( query. contentdom = = ContentDomain . VIDEO & & page . lvideo ( ) = = 0 ) | |
( this . query. contentdom = = ContentDomain . VIDEO & & page . lvideo ( ) = = 0 ) | |
( query. contentdom = = ContentDomain . IMAGE & & page . limage ( ) = = 0 ) | |
( this . query. contentdom = = ContentDomain . IMAGE & & page . limage ( ) = = 0 ) | |
( query. contentdom = = ContentDomain . APP & & page . lapp ( ) = = 0 ) ) {
( this . query. contentdom = = ContentDomain . APP & & page . lapp ( ) = = 0 ) ) {
this . sortout + + ;
this . sortout + + ;
continue ;
continue ;
}
}
@ -479,7 +479,7 @@ public final class RankingProcess extends Thread {
// author navigation:
// author navigation:
if ( pageauthor ! = null & & pageauthor . length ( ) > 0 ) {
if ( pageauthor ! = null & & pageauthor . length ( ) > 0 ) {
// add author to the author navigator
// add author to the author navigator
String authorhash = ASCII . String ( Word . word2hash ( pageauthor ) ) ;
final String authorhash = ASCII . String ( Word . word2hash ( pageauthor ) ) ;
// check if we already are filtering for authors
// check if we already are filtering for authors
if ( this . query . authorhash ! = null & & ! this . query . authorhash . equals ( authorhash ) ) {
if ( this . query . authorhash ! = null & & ! this . query . authorhash . equals ( authorhash ) ) {
@ -518,31 +518,31 @@ public final class RankingProcess extends Thread {
}
}
public int sizeQueue ( ) {
public int sizeQueue ( ) {
int c = stack. sizeQueue ( ) ;
int c = this . stack. sizeQueue ( ) ;
for ( WeakPriorityBlockingQueue < WordReferenceVars > s : this . doubleDomCache . values ( ) ) {
for ( final WeakPriorityBlockingQueue < WordReferenceVars > s : this . doubleDomCache . values ( ) ) {
c + = s . sizeQueue ( ) ;
c + = s . sizeQueue ( ) ;
}
}
return c ;
return c ;
}
}
public int sizeAvailable ( ) {
public int sizeAvailable ( ) {
int c = stack. sizeAvailable ( ) ;
int c = this . stack. sizeAvailable ( ) ;
for ( WeakPriorityBlockingQueue < WordReferenceVars > s : this . doubleDomCache . values ( ) ) {
for ( final WeakPriorityBlockingQueue < WordReferenceVars > s : this . doubleDomCache . values ( ) ) {
c + = s . sizeAvailable ( ) ;
c + = s . sizeAvailable ( ) ;
}
}
return c ;
return c ;
}
}
public boolean isEmpty ( ) {
public boolean isEmpty ( ) {
if ( ! stack. isEmpty ( ) ) return false ;
if ( ! this . stack. isEmpty ( ) ) return false ;
for ( WeakPriorityBlockingQueue < WordReferenceVars > s : this . doubleDomCache . values ( ) ) {
for ( final WeakPriorityBlockingQueue < WordReferenceVars > s : this . doubleDomCache . values ( ) ) {
if ( ! s . isEmpty ( ) ) return false ;
if ( ! s . isEmpty ( ) ) return false ;
}
}
return true ;
return true ;
}
}
public int [ ] flagCount ( ) {
public int [ ] flagCount ( ) {
return flagcount;
return this . flagcount;
}
}
// "results from a total number of <remote_resourceSize + local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers."
// "results from a total number of <remote_resourceSize + local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers."
@ -591,7 +591,7 @@ public final class RankingProcess extends Thread {
}
}
public ScoreMap < String > getHostNavigator ( ) {
public ScoreMap < String > getHostNavigator ( ) {
ScoreMap < String > result = new ConcurrentScoreMap < String > ( ) ;
final ScoreMap < String > result = new ConcurrentScoreMap < String > ( ) ;
if ( ! this . query . navigators . equals ( "all" ) & & this . query . navigators . indexOf ( "hosts" ) < 0 ) return result ;
if ( ! this . query . navigators . equals ( "all" ) & & this . query . navigators . indexOf ( "hosts" ) < 0 ) return result ;
final Iterator < String > domhashs = this . hostNavigator . keys ( false ) ;
final Iterator < String > domhashs = this . hostNavigator . keys ( false ) ;
@ -613,14 +613,14 @@ public final class RankingProcess extends Thread {
}
}
public static final Comparator < Map . Entry < String , Integer > > mecomp = new Comparator < Map . Entry < String , Integer > > ( ) {
public static final Comparator < Map . Entry < String , Integer > > mecomp = new Comparator < Map . Entry < String , Integer > > ( ) {
public int compare ( Map . Entry < String , Integer > o1 , Map . Entry < String , Integer > o2 ) {
public int compare ( final Map . Entry < String , Integer > o1 , final Map . Entry < String , Integer > o2 ) {
if ( o1 . getValue ( ) . intValue ( ) < o2 . getValue ( ) . intValue ( ) ) return 1 ;
if ( o1 . getValue ( ) . intValue ( ) < o2 . getValue ( ) . intValue ( ) ) return 1 ;
if ( o2 . getValue ( ) . intValue ( ) < o1 . getValue ( ) . intValue ( ) ) return - 1 ;
if ( o2 . getValue ( ) . intValue ( ) < o1 . getValue ( ) . intValue ( ) ) return - 1 ;
return 0 ;
return 0 ;
}
}
} ;
} ;
public ScoreMap < String > getTopicNavigator ( int count ) {
public ScoreMap < String > getTopicNavigator ( final int count ) {
// create a list of words that had been computed by statistics over all
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
// words that appeared in the url or the description of all urls
final ScoreMap < String > result = new ConcurrentScoreMap < String > ( ) ;
final ScoreMap < String > result = new ConcurrentScoreMap < String > ( ) ;
@ -645,23 +645,25 @@ public final class RankingProcess extends Thread {
counts . put ( word , q ) ;
counts . put ( word , q ) ;
}
}
}
}
if ( max > min ) for ( Map . Entry < String , Float > ce : counts . entrySet ( ) ) {
if ( max > min ) for ( final Map . Entry < String , Float > ce : counts . entrySet ( ) ) {
result . set ( ce . getKey ( ) , ( int ) ( ( ( double ) count ) * ( ce . getValue ( ) - min ) / ( max - min ) ) ) ;
result . set ( ce . getKey ( ) , ( int ) ( ( ( double ) count ) * ( ce . getValue ( ) - min ) / ( max - min ) ) ) ;
}
}
return this . ref ;
return this . ref ;
}
}
private final static Pattern lettermatch = Pattern . compile ( "[a-z]+" ) ;
public void addTopic ( final String [ ] words ) {
public void addTopic ( final String [ ] words ) {
String word ;
String word ;
for ( final String w : words ) {
for ( final String w : words ) {
word = w . toLowerCase ( ) ;
word = w . toLowerCase ( ) ;
if ( word . length ( ) > 2 & &
if ( word . length ( ) > 2 & &
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off" . indexOf ( word ) < 0 & &
"http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off" . indexOf ( word ) < 0 & &
! query. queryHashes . has ( Word . word2hash ( word ) ) & &
! this . query. queryHashes . has ( Word . word2hash ( word ) ) & &
word. matches ( "[a-z]+" ) & &
lettermatch. matcher ( word) . matches ( ) & &
! Switchboard . badwords . contains ( word ) & &
! Switchboard . badwords . contains ( word ) & &
! Switchboard . stopwords . contains ( word ) ) {
! Switchboard . stopwords . contains ( word ) ) {
ref. inc ( word ) ;
this . ref. inc ( word ) ;
}
}
}
}
}
}