diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index f9ffe683d..4fe5cc154 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -308,7 +308,7 @@ public final class search { theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0)); // set statistic details of search result and find best result index set - joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount(); + joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount(); prop.put("joincount", Integer.toString(joincount)); if (joincount != 0) { accu = theSearch.result().completeResults(maxtime); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index ec03defb4..ca3d3f0c5 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -627,11 +627,12 @@ public class yacysearch { Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " + "local-unfiltered(" + theSearch.getRankingResult().getLocalIndexCount() + "), " + "-local_miss(" + theSearch.getRankingResult().getMissCount() + "), " + + "-local_sortout(" + theSearch.getRankingResult().getSortOutCount() + "), " + "remote(" + theSearch.getRankingResult().getRemoteResourceSize() + ") links found, " + (System.currentTimeMillis() - timestamp) + " ms"); // prepare search statistics - theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount(); + theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount(); theQuery.searchtime = System.currentTimeMillis() - timestamp; theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime(); theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime(); @@ -695,7 +696,7 @@ public class yacysearch { Log.logException(e); } - final int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount(); + final int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount(); prop.put("num-results_offset", offset); prop.put("num-results_itemscount", Formatter.number(0, true)); prop.put("num-results_itemsPerPage", itemsPerPage); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index d863386a0..50580a96e 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -92,7 +92,7 @@ public class yacysearchitem { final QueryParams theQuery = theSearch.getQuery(); // dynamically update count values - final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount(); + final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount(); final int offset = theQuery.neededResults() - theQuery.displayResults() + 1; prop.put("offset", offset); prop.put("itemscount", Formatter.number(Math.min((item < 0) ? theQuery.neededResults() : item + 1, totalcount))); diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 42f2ccb05..c1d75aa01 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -168,7 +168,7 @@ public class yacysearchtrailer { String aboutBody = env.getConfig("about.body", ""); String aboutHeadline = env.getConfig("about.headline", ""); if ((aboutBody.length() == 0 && aboutHeadline.length() == 0) || - theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount() == 0) { + theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount() == 0) { prop.put("nav-about", 0); } else { prop.put("nav-about", 1); diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 26f7e78ea..50efe3a00 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -74,6 +74,7 @@ public final class RankingProcess extends Thread { private final SortedSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final int[] flagcount; // flag counter private final SortedSet misses; // contains url-hashes that could not been found in the LURL-DB + private int sortout; // counter for referenced that had been sorted out for other reasons //private final int[] domZones; private SortedMap> localSearchInclusion; @@ -112,6 +113,7 @@ public final class RankingProcess extends Thread { //this.urlhashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); this.misses = new TreeSet(URIMetadataRow.rowdef.objectOrder); //this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0); + this.sortout = 0; this.flagcount = new int[32]; for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} this.hostNavigator = new ConcurrentScoreMap(); @@ -208,21 +210,23 @@ public final class RankingProcess extends Thread { // check constraints if (!testFlags(iEntry)) { + this.sortout++; continue; } // check document domain if (query.contentdom != ContentDomain.TEXT) { - if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue; - if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue; - if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue; - if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue; + if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { this.sortout++; continue; } + if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { this.sortout++; continue; } + if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { this.sortout++; continue; } + if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { this.sortout++; continue; } } // check tld domain /* if ((DigestURI.domDomain(iEntry.metadataHash()) & this.query.zonecode) == 0) { // filter out all tld that do not match with wanted tld domain + this.sortout++; continue; } */ @@ -241,6 +245,7 @@ public final class RankingProcess extends Thread { } else { if (!domhash.equals(query.sitehash)) { // filter out all domains that do not match with the site constraint + this.sortout++; continue; } } @@ -409,12 +414,14 @@ public final class RankingProcess extends Thread { // check errors if (metadata == null) { + this.sortout++; continue; // rare case where the url is corrupted } if (!query.urlMask_isCatchall) { // check url mask if (!metadata.matches(query.urlMask)) { + this.sortout++; continue; } @@ -428,6 +435,7 @@ public final class RankingProcess extends Thread { // check for more errors if (metadata.url() == null) { + this.sortout++; continue; // rare case where the url is corrupted } @@ -439,6 +447,7 @@ public final class RankingProcess extends Thread { if ((QueryParams.anymatch(pagetitle, query.excludeHashes)) || (QueryParams.anymatch(pageurl.toLowerCase(), query.excludeHashes)) || (QueryParams.anymatch(pageauthor.toLowerCase(), query.excludeHashes))) { + this.sortout++; continue; } @@ -450,6 +459,7 @@ public final class RankingProcess extends Thread { while (wi.hasNext()) { this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash()); } + this.sortout++; continue; } @@ -457,6 +467,7 @@ public final class RankingProcess extends Thread { if ((query.constraint != null) && (query.constraint.get(Condenser.flag_cat_haslocation)) && (metadata.lat() == 0.0f || metadata.lon() == 0.0f)) { + this.sortout++; continue; } @@ -465,6 +476,7 @@ public final class RankingProcess extends Thread { (query.contentdom == ContentDomain.VIDEO && page.lvideo() == 0) || (query.contentdom == ContentDomain.IMAGE && page.limage() == 0) || (query.contentdom == ContentDomain.APP && page.lapp() == 0)) { + this.sortout++; continue; } @@ -476,12 +488,14 @@ public final class RankingProcess extends Thread { // check if we already are filtering for authors if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) { + this.sortout++; continue; } // add author to the author navigator this.authorNavigator.inc(pageauthor); } else if (this.query.authorhash != null) { + this.sortout++; continue; } @@ -498,6 +512,7 @@ public final class RankingProcess extends Thread { // check Scanner if (!Scanner.acceptURL(metadata.url())) { + this.sortout++; continue; } @@ -570,6 +585,10 @@ public final class RankingProcess extends Thread { return this.misses.size(); } + public int getSortOutCount() { + return this.sortout; + } + public ScoreMap getNamespaceNavigator() { if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ClusteredScoreMap(); if (this.namespaceNavigator.sizeSmaller(2)) this.namespaceNavigator.clear(); // navigators with one entry are not useful