@ -29,7 +29,6 @@ package de.anomic.plasma;
import java.io.File ;
import java.io.File ;
import java.io.IOException ;
import java.io.IOException ;
import java.util.HashMap ;
import java.util.HashMap ;
import java.util.HashSet ;
import java.util.Iterator ;
import java.util.Iterator ;
import java.util.Map ;
import java.util.Map ;
import java.util.Set ;
import java.util.Set ;
@ -40,34 +39,45 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexContainer ;
import de.anomic.index.indexContainer ;
import de.anomic.index.indexRWIEntry ;
import de.anomic.index.indexRWIEntry ;
import de.anomic.index.indexRWIEntryOrder ;
import de.anomic.index.indexRWIEntryOrder ;
import de.anomic.index.indexURLEntry ;
import de.anomic.kelondro.kelondroBinSearch ;
import de.anomic.kelondro.kelondroBinSearch ;
import de.anomic.kelondro.kelondroMScoreCluster ;
import de.anomic.kelondro.kelondroMScoreCluster ;
import de.anomic.server.serverCodings ;
import de.anomic.server.serverCodings ;
import de.anomic.server.serverFileUtils ;
import de.anomic.server.serverFileUtils ;
import de.anomic.server.serverProfiling ;
import de.anomic.server.serverProfiling ;
import de.anomic.yacy.yacyURL ;
public final class plasmaSearchRankingProcess {
public final class plasmaSearchRankingProcess {
public static kelondroBinSearch [ ] ybrTables = null ; // block-rank tables
public static kelondroBinSearch [ ] ybrTables = null ; // block-rank tables
private static boolean useYBR = true ;
private static boolean useYBR = true ;
private TreeMap pageAcc ; // key = ranking (Long); value = indexRWIEntry
private TreeMap sortedRWIEntries ; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String
private HashMap doubleDomCache ; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
private HashMap handover ; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
private plasmaSearchQuery query ;
private plasmaSearchQuery query ;
private plasmaSearchRankingProfile ranking ;
private plasmaSearchRankingProfile ranking ;
private int sortorder ;
private int filteredCount ;
private int filteredCount ;
private indexRWIEntryOrder order ;
private serverProfiling process ;
private int maxentries ;
private int maxentries ;
private int globalcount ;
private int globalcount ;
private indexRWIEntryOrder order ;
private serverProfiling process ;
private HashMap urlhashes ; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private HashMap urlhashes ; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private kelondroMScoreCluster ref ; // reference score computation for the commonSense heuristic
private kelondroMScoreCluster ref ; // reference score computation for the commonSense heuristic
private int [ ] c ; // flag counter
private int [ ] flagcount ; // flag counter
private TreeSet misses ; // contains url-hashes that could not been found in the LURL-DB
private plasmaWordIndex wordIndex ;
private Map [ ] localSearchContainerMaps ;
public plasmaSearchRankingProcess ( plasmaSearchQuery query , serverProfiling process , plasmaSearchRankingProfile ranking , int maxentries ) {
public plasmaSearchRankingProcess ( plasma WordIndex wordIndex , plasma SearchQuery query , serverProfiling process , plasmaSearchRankingProfile ranking , int sortorder , int maxentries ) {
// we collect the urlhashes and construct a list with urlEntry objects
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
// attention: if minEntries is too high, this method will not terminate within the maxTime
this . pageAcc = new TreeMap ( ) ;
// sortorder: 0 = hash, 1 = url, 2 = ranking
this . localSearchContainerMaps = null ;
this . sortedRWIEntries = new TreeMap ( ) ;
this . doubleDomCache = new HashMap ( ) ;
this . handover = new HashMap ( ) ;
this . filteredCount = 0 ;
this . process = process ;
this . process = process ;
this . order = null ;
this . order = null ;
this . query = query ;
this . query = query ;
@ -76,8 +86,80 @@ public final class plasmaSearchRankingProcess {
this . globalcount = 0 ;
this . globalcount = 0 ;
this . urlhashes = new HashMap ( ) ;
this . urlhashes = new HashMap ( ) ;
this . ref = new kelondroMScoreCluster ( ) ;
this . ref = new kelondroMScoreCluster ( ) ;
c = new int [ 32 ] ;
this . misses = new TreeSet ( ) ;
for ( int i = 0 ; i < 32 ; i + + ) { c [ i ] = 0 ; }
this . wordIndex = wordIndex ;
this . sortorder = sortorder ;
this . flagcount = new int [ 32 ] ;
for ( int i = 0 ; i < 32 ; i + + ) { this . flagcount [ i ] = 0 ; }
}
public void execQuery ( boolean fetchURLs ) {
if ( process ! = null ) process . startTimer ( ) ;
this . localSearchContainerMaps = wordIndex . localSearchContainers ( query , null ) ;
if ( process ! = null ) process . yield ( plasmaSearchEvent . COLLECTION , this . localSearchContainerMaps [ 0 ] . size ( ) ) ;
// join and exlcude the local result
if ( process ! = null ) process . startTimer ( ) ;
indexContainer index =
( this . localSearchContainerMaps = = null ) ?
plasmaWordIndex . emptyContainer ( null , 0 ) :
indexContainer . joinExcludeContainers (
this . localSearchContainerMaps [ 0 ] . values ( ) ,
this . localSearchContainerMaps [ 1 ] . values ( ) ,
query . maxDistance ) ;
if ( process ! = null ) process . yield ( plasmaSearchEvent . JOIN , index . size ( ) ) ;
int joincount = index . size ( ) ;
if ( ( index = = null ) | | ( joincount = = 0 ) ) {
return ;
}
if ( sortorder = = 2 ) {
insert ( index , true ) ;
} else {
final Iterator en = index . entries ( ) ;
// generate a new map where the urls are sorted (not by hash but by the url text)
indexRWIEntry ientry ;
indexURLEntry uentry ;
String u ;
loop : while ( en . hasNext ( ) ) {
ientry = ( indexRWIEntry ) en . next ( ) ;
// check constraints
if ( ! testFlags ( ientry ) ) continue loop ;
// increase flag counts
for ( int i = 0 ; i < 32 ; i + + ) {
if ( ientry . flags ( ) . get ( i ) ) { flagcount [ i ] + + ; }
}
// load url
if ( sortorder = = 0 ) {
this . sortedRWIEntries . put ( ientry . urlHash ( ) , ientry ) ;
this . urlhashes . put ( ientry . urlHash ( ) , ientry . urlHash ( ) ) ;
filteredCount + + ;
} else {
if ( fetchURLs ) {
uentry = wordIndex . loadedURL . load ( ientry . urlHash ( ) , ientry , 0 ) ;
if ( uentry = = null ) {
this . misses . add ( ientry . urlHash ( ) ) ;
} else {
u = uentry . comp ( ) . url ( ) . toNormalform ( false , true ) ;
this . sortedRWIEntries . put ( u , ientry ) ;
this . urlhashes . put ( ientry . urlHash ( ) , u ) ;
filteredCount + + ;
}
} else {
filteredCount + + ;
}
}
// interrupt if we have enough
if ( ( query . neededResults ( ) > 0 ) & & ( this . misses . size ( ) + this . sortedRWIEntries . size ( ) > query . neededResults ( ) ) ) break loop ;
} // end loop
}
}
}
public void insert ( indexContainer container , boolean local ) {
public void insert ( indexContainer container , boolean local ) {
@ -102,7 +184,6 @@ public final class plasmaSearchRankingProcess {
// normalize entries and get ranking
// normalize entries and get ranking
if ( process ! = null ) process . startTimer ( ) ;
if ( process ! = null ) process . startTimer ( ) ;
Iterator i = container . entries ( ) ;
Iterator i = container . entries ( ) ;
this . pageAcc = new TreeMap ( ) ;
indexRWIEntry iEntry , l ;
indexRWIEntry iEntry , l ;
long biggestEntry = 0 ;
long biggestEntry = 0 ;
//long s0 = System.currentTimeMillis();
//long s0 = System.currentTimeMillis();
@ -113,89 +194,164 @@ public final class plasmaSearchRankingProcess {
// increase flag counts
// increase flag counts
for ( int j = 0 ; j < 32 ; j + + ) {
for ( int j = 0 ; j < 32 ; j + + ) {
if ( iEntry . flags ( ) . get ( j ) ) { c[ j ] + + ; }
if ( iEntry . flags ( ) . get ( j ) ) { flag count [ j ] + + ; }
}
}
// kick out entries that are too bad according to current findings
// kick out entries that are too bad according to current findings
r = new Long ( order . cardinal ( iEntry ) ) ;
r = new Long ( order . cardinal ( iEntry ) ) ;
if ( ( maxentries > = 0 ) & & ( pageAcc . size ( ) > = maxentries ) & & ( r . longValue ( ) > biggestEntry ) ) continue ;
if ( ( maxentries > = 0 ) & & ( sortedRWIEntries . size ( ) > = maxentries ) & & ( r . longValue ( ) > biggestEntry ) ) continue ;
// check constraints
// check constraints
if ( ( ! ( query . constraint . equals ( plasmaSearchQuery . catchall_constraint ) ) ) & & ( ! ( iEntry . flags ( ) . allOf ( query . constraint ) ) ) ) continue ; // filter out entries that do not match the search constraint
if ( ! testFlags ( iEntry ) ) continue ;
if ( query . contentdom ! = plasmaSearchQuery . CONTENTDOM_TEXT ) {
if ( query . contentdom ! = plasmaSearchQuery . CONTENTDOM_TEXT ) {
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_AUDIO ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasaudio ) ) ) ) continue ;
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_AUDIO ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasaudio ) ) ) ) continue ;
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_VIDEO ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasvideo ) ) ) ) continue ;
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_VIDEO ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasvideo ) ) ) ) continue ;
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_IMAGE ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasimage ) ) ) ) continue ;
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_IMAGE ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasimage ) ) ) ) continue ;
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_APP ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasapp ) ) ) ) continue ;
if ( ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_APP ) & & ( ! ( iEntry . flags ( ) . get ( plasmaCondenser . flag_cat_hasapp ) ) ) ) continue ;
}
}
if ( ( maxentries < 0 ) | | ( pageAcc . size ( ) < maxentries ) ) {
if ( ( maxentries < 0 ) | | ( sortedRWIEntries . size ( ) < maxentries ) ) {
if ( urlhashes . containsKey ( iEntry . urlHash ( ) ) ) continue ;
if ( urlhashes . containsKey ( iEntry . urlHash ( ) ) ) continue ;
while ( pageAcc . containsKey ( r ) ) r = new Long ( r . longValue ( ) + 1 ) ;
while ( sortedRWIEntries . containsKey ( r ) ) r = new Long ( r . longValue ( ) + 1 ) ;
pageAcc . put ( r , iEntry ) ;
sortedRWIEntries . put ( r , iEntry ) ;
} else {
} else {
if ( r . longValue ( ) > biggestEntry ) {
if ( r . longValue ( ) > biggestEntry ) {
continue ;
continue ;
} else {
} else {
if ( urlhashes . containsKey ( iEntry . urlHash ( ) ) ) continue ;
if ( urlhashes . containsKey ( iEntry . urlHash ( ) ) ) continue ;
l = ( indexRWIEntry ) pageAcc. remove ( ( Long ) pageAcc . lastKey ( ) ) ;
l = ( indexRWIEntry ) sortedRWIEntries. remove ( ( Long ) sortedRWIEntries . lastKey ( ) ) ;
urlhashes . remove ( l . urlHash ( ) ) ;
urlhashes . remove ( l . urlHash ( ) ) ;
while ( pageAcc . containsKey ( r ) ) r = new Long ( r . longValue ( ) + 1 ) ;
while ( sortedRWIEntries . containsKey ( r ) ) r = new Long ( r . longValue ( ) + 1 ) ;
pageAcc . put ( r , iEntry ) ;
sortedRWIEntries . put ( r , iEntry ) ;
biggestEntry = order . cardinal ( ( indexRWIEntry ) pageAcc. get ( pageAcc . lastKey ( ) ) ) ;
biggestEntry = order . cardinal ( ( indexRWIEntry ) sortedRWIEntries. get ( sortedRWIEntries . lastKey ( ) ) ) ;
}
}
}
}
urlhashes . put ( iEntry . urlHash ( ) , r ) ;
// increase counter for statistics
// increase counter for statistics
if ( ! local ) this . globalcount + + ;
if ( ! local ) this . globalcount + + ;
}
}
this . filteredCount = pageAcc . size ( ) ;
this . filteredCount = sortedRWIEntries . size ( ) ;
//long sc = Math.max(1, System.currentTimeMillis() - s0);
//long sc = Math.max(1, System.currentTimeMillis() - s0);
//System.out.println("###DEBUG### time to sort " + container.size() + " entries to " + this.filteredCount + ": " + sc + " milliseconds, " + (container.size() / sc) + " entries/millisecond, ranking = " + tc);
//System.out.println("###DEBUG### time to sort " + container.size() + " entries to " + this.filteredCount + ": " + sc + " milliseconds, " + (container.size() / sc) + " entries/millisecond, ranking = " + tc);
if ( container . size ( ) > query . neededResults ( ) ) remove ( true , true ) ;
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
if ( process ! = null ) process . yield ( plasmaSearchEvent . PRESORT , container . size ( ) ) ;
if ( process ! = null ) process . yield ( plasmaSearchEvent . PRESORT , container . size ( ) ) ;
}
}
public class rIterator implements Iterator {
boolean urls ;
Iterator r ;
plasmaWordIndex wi ;
public rIterator ( plasmaWordIndex wi , boolean fetchURLs ) {
// if fetchURLs == true, this iterates indexURLEntry objects, otherwise it iterates indexRWIEntry objects
this . urls = fetchURLs ;
this . r = pageAcc . entrySet ( ) . iterator ( ) ;
this . wi = wi ;
}
public boolean hasNext ( ) {
return r . hasNext ( ) ;
}
public Object next ( ) {
Map . Entry entry = ( Map . Entry ) r . next ( ) ;
indexRWIEntry ientry = ( indexRWIEntry ) entry . getValue ( ) ;
if ( urls ) {
return wi . loadedURL . load ( ientry . urlHash ( ) , ientry , ( ( Long ) entry . getKey ( ) ) . longValue ( ) ) ;
} else {
return ientry ;
}
}
public void remove ( ) {
private boolean testFlags ( indexRWIEntry ientry ) {
throw new UnsupportedOperationException ( ) ;
if ( query . constraint = = null ) return true ;
}
// test if ientry matches with filter
// if all = true: let only entries pass that has all matching bits
// if all = false: let all entries pass that has at least one matching bit
if ( query . allofconstraint ) {
for ( int i = 0 ; i < 32 ; i + + ) {
if ( ( query . constraint . get ( i ) ) & & ( ! ientry . flags ( ) . get ( i ) ) ) return false ;
}
return true ;
}
for ( int i = 0 ; i < 32 ; i + + ) {
if ( ( query . constraint . get ( i ) ) & & ( ientry . flags ( ) . get ( i ) ) ) return true ;
}
return false ;
}
}
public int size ( ) {
public synchronized Map [ ] searchContainerMaps ( ) {
assert pageAcc . size ( ) = = urlhashes . size ( ) ;
// direct access to the result maps is needed for abstract generation
return pageAcc . size ( ) ;
// this is only available if execQuery() was called before
return localSearchContainerMaps ;
}
// todo:
// - remove redundant urls (sub-path occurred before)
// - move up shorter urls
// - root-domain guessing to prefer the root domain over other urls if search word appears in domain name
private synchronized Object [ ] /*{Object, indexRWIEntry}*/ bestRWI ( boolean skipDoubleDom ) {
// returns from the current RWI list the best entry and removed this entry from the list
Object bestEntry ;
TreeMap m ;
indexRWIEntry rwi ;
while ( sortedRWIEntries . size ( ) > 0 ) {
bestEntry = sortedRWIEntries . firstKey ( ) ;
rwi = ( indexRWIEntry ) sortedRWIEntries . remove ( bestEntry ) ;
if ( ! skipDoubleDom ) return new Object [ ] { bestEntry , rwi } ;
// check doubledom
String domhash = rwi . urlHash ( ) . substring ( 6 ) ;
m = ( TreeMap ) this . doubleDomCache . get ( domhash ) ;
if ( m = = null ) {
// first appearance of dom
m = new TreeMap ( ) ;
this . doubleDomCache . put ( domhash , m ) ;
return new Object [ ] { bestEntry , rwi } ;
}
// second appearances of dom
m . put ( bestEntry , rwi ) ;
}
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
// find best entry from all caches
Iterator i = this . doubleDomCache . values ( ) . iterator ( ) ;
bestEntry = null ;
Object o ;
indexRWIEntry bestrwi = null ;
while ( i . hasNext ( ) ) {
m = ( TreeMap ) i . next ( ) ;
if ( m . size ( ) = = 0 ) continue ;
if ( bestEntry = = null ) {
bestEntry = m . firstKey ( ) ;
bestrwi = ( indexRWIEntry ) m . remove ( bestEntry ) ;
continue ;
}
o = m . firstKey ( ) ;
rwi = ( indexRWIEntry ) m . remove ( o ) ;
if ( o instanceof Long ) {
if ( ( ( Long ) o ) . longValue ( ) < ( ( Long ) bestEntry ) . longValue ( ) ) {
bestEntry = o ;
bestrwi = rwi ;
}
}
if ( o instanceof String ) {
if ( ( ( String ) o ) . compareTo ( ( String ) bestEntry ) < 0 ) {
bestEntry = o ;
bestrwi = rwi ;
}
}
}
if ( bestrwi = = null ) return null ;
// finally remove the best entry from the doubledom cache
m = ( TreeMap ) this . doubleDomCache . get ( bestrwi . urlHash ( ) . substring ( 6 ) ) ;
m . remove ( bestEntry ) ;
return new Object [ ] { bestEntry , bestrwi } ;
}
public synchronized indexURLEntry bestURL ( boolean skipDoubleDom ) {
// returns from the current RWI list the best URL entry and removed this entry from the list
while ( ( sortedRWIEntries . size ( ) > 0 ) | | ( size ( ) > 0 ) ) {
Object [ ] obrwi = bestRWI ( skipDoubleDom ) ;
Object bestEntry = obrwi [ 0 ] ;
indexRWIEntry ientry = ( indexRWIEntry ) obrwi [ 1 ] ;
long ranking = ( bestEntry instanceof Long ) ? ( ( Long ) bestEntry ) . longValue ( ) : 0 ;
indexURLEntry u = wordIndex . loadedURL . load ( ientry . urlHash ( ) , ientry , ranking ) ;
if ( u ! = null ) {
this . handover . put ( u . hash ( ) , u . comp ( ) . url ( ) . toNormalform ( true , false ) ) ; // remember that we handed over this url
return u ;
}
misses . add ( ientry . urlHash ( ) ) ;
}
return null ;
}
public synchronized int size ( ) {
//assert sortedRWIEntries.size() == urlhashes.size() : "sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() = " + urlhashes.size();
int c = sortedRWIEntries . size ( ) ;
Iterator i = this . doubleDomCache . values ( ) . iterator ( ) ;
while ( i . hasNext ( ) ) c + = ( ( TreeMap ) i . next ( ) ) . size ( ) ;
return c ;
}
}
public int [ ] flagCount ( ) {
public int [ ] flagCount ( ) {
return c ;
return flag count ;
}
}
public int filteredCount ( ) {
public int filteredCount ( ) {
@ -207,17 +363,16 @@ public final class plasmaSearchRankingProcess {
}
}
public indexRWIEntry remove ( String urlHash ) {
public indexRWIEntry remove ( String urlHash ) {
Long r = ( Long ) urlhashes . get ( urlHash ) ;
Object r = ( Long ) urlhashes . get ( urlHash ) ;
if ( r = = null ) return null ;
if ( r = = null ) return null ;
assert pageAcc . containsKey ( r ) ;
assert sortedRWIEntries . containsKey ( r ) ;
indexRWIEntry iEntry = ( indexRWIEntry ) pageAcc . remove ( r ) ;
indexRWIEntry iEntry = ( indexRWIEntry ) sortedRWIEntries . remove ( r ) ;
urlhashes . remove ( urlHash ) ;
urlhashes . remove ( urlHash ) ;
return iEntry ;
return iEntry ;
}
}
public Iterator entries ( plasmaWordIndex wi , boolean fetchURLs ) {
public Iterator miss ( ) {
// if fetchURLs == true, this iterates indexURLEntry objects, otherwise it iterates indexRWIEntry objects
return this . misses . iterator ( ) ;
return new rIterator ( wi , fetchURLs ) ;
}
}
public Set getReferences ( int count ) {
public Set getReferences ( int count ) {
@ -257,35 +412,6 @@ public final class plasmaSearchRankingProcess {
return this . order ;
return this . order ;
}
}
private void remove ( boolean rootDomExt , boolean doubleDom ) {
// this removes all refererences to urls that are extended paths of existing 'RootDom'-urls
if ( pageAcc . size ( ) < = query . neededResults ( ) ) return ;
HashSet rootDoms = new HashSet ( ) ;
HashSet doubleDoms = new HashSet ( ) ;
Iterator i = pageAcc . entrySet ( ) . iterator ( ) ;
Map . Entry entry ;
indexRWIEntry iEntry ;
String hashpart ;
boolean isWordRootURL ;
TreeSet querywords = plasmaSearchQuery . cleanQuery ( query . queryString ( ) ) [ 0 ] ;
while ( i . hasNext ( ) ) {
if ( pageAcc . size ( ) < = query . neededResults ( ) ) break ;
entry = ( Map . Entry ) i . next ( ) ;
iEntry = ( indexRWIEntry ) entry . getValue ( ) ;
hashpart = iEntry . urlHash ( ) . substring ( 6 ) ;
isWordRootURL = yacyURL . isWordRootURL ( iEntry . urlHash ( ) , querywords ) ;
if ( isWordRootURL ) {
rootDoms . add ( hashpart ) ;
} else {
if ( ( ( rootDomExt ) & & ( rootDoms . contains ( hashpart ) ) ) | |
( ( doubleDom ) & & ( doubleDoms . contains ( hashpart ) ) ) ) {
i . remove ( ) ;
}
}
doubleDoms . add ( hashpart ) ;
}
}
public static void loadYBR ( File rankingPath , int count ) {
public static void loadYBR ( File rankingPath , int count ) {
// load ranking tables
// load ranking tables
if ( rankingPath . exists ( ) ) {
if ( rankingPath . exists ( ) ) {
@ -337,4 +463,45 @@ public final class plasmaSearchRankingProcess {
return 15 ;
return 15 ;
}
}
public long postRanking (
Set topwords ,
plasmaSearchEvent . ResultEntry rentry ,
int position ) {
long r = ( 255 - position ) < < 8 ;
// for media search: prefer pages with many links
if ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_IMAGE ) r + = rentry . limage ( ) < < ranking . coeff_cathasimage ;
if ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_AUDIO ) r + = rentry . laudio ( ) < < ranking . coeff_cathasaudio ;
if ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_VIDEO ) r + = rentry . lvideo ( ) < < ranking . coeff_cathasvideo ;
if ( query . contentdom = = plasmaSearchQuery . CONTENTDOM_APP ) r + = rentry . lapp ( ) < < ranking . coeff_cathasapp ;
// prefer hit with 'prefer' pattern
if ( rentry . url ( ) . toNormalform ( true , true ) . matches ( query . prefer ) ) r + = 256 < < ranking . coeff_prefer ;
if ( rentry . title ( ) . matches ( query . prefer ) ) r + = 256 < < ranking . coeff_prefer ;
// apply 'common-sense' heuristic using references
String urlstring = rentry . url ( ) . toNormalform ( true , true ) ;
String [ ] urlcomps = htmlFilterContentScraper . urlComps ( urlstring ) ;
String [ ] descrcomps = rentry . title ( ) . toLowerCase ( ) . split ( htmlFilterContentScraper . splitrex ) ;
for ( int j = 0 ; j < urlcomps . length ; j + + ) {
if ( topwords . contains ( urlcomps [ j ] ) ) r + = Math . max ( 1 , 256 - urlstring . length ( ) ) < < ranking . coeff_urlcompintoplist ;
}
for ( int j = 0 ; j < descrcomps . length ; j + + ) {
if ( topwords . contains ( descrcomps [ j ] ) ) r + = Math . max ( 1 , 256 - rentry . title ( ) . length ( ) ) < < ranking . coeff_descrcompintoplist ;
}
// apply query-in-result matching
Set urlcomph = plasmaCondenser . words2hashSet ( urlcomps ) ;
Set descrcomph = plasmaCondenser . words2hashSet ( descrcomps ) ;
Iterator shi = query . queryHashes . iterator ( ) ;
String queryhash ;
while ( shi . hasNext ( ) ) {
queryhash = ( String ) shi . next ( ) ;
if ( urlcomph . contains ( queryhash ) ) r + = 256 < < ranking . coeff_appurl ;
if ( descrcomph . contains ( queryhash ) ) r + = 256 < < ranking . coeff_appdescr ;
}
return r ;
}
}
}