fix for wrong search result counter: added a counter for all filtered out entities

see also http://bugs.yacy.net/view.php?id=5

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7704 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 61c9a791c4
commit 0621a15f89

@ -308,7 +308,7 @@ public final class search {
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
// set statistic details of search result and find best result index set
joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount();
joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount();
prop.put("joincount", Integer.toString(joincount));
if (joincount != 0) {
accu = theSearch.result().completeResults(maxtime);

@ -627,11 +627,12 @@ public class yacysearch {
Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " +
"local-unfiltered(" + theSearch.getRankingResult().getLocalIndexCount() + "), " +
"-local_miss(" + theSearch.getRankingResult().getMissCount() + "), " +
"-local_sortout(" + theSearch.getRankingResult().getSortOutCount() + "), " +
"remote(" + theSearch.getRankingResult().getRemoteResourceSize() + ") links found, " +
(System.currentTimeMillis() - timestamp) + " ms");
// prepare search statistics
theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount();
theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime();
@ -695,7 +696,7 @@ public class yacysearch {
Log.logException(e);
}
final int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
prop.put("num-results_offset", offset);
prop.put("num-results_itemscount", Formatter.number(0, true));
prop.put("num-results_itemsPerPage", itemsPerPage);

@ -92,7 +92,7 @@ public class yacysearchitem {
final QueryParams theQuery = theSearch.getQuery();
// dynamically update count values
final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int totalcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int offset = theQuery.neededResults() - theQuery.displayResults() + 1;
prop.put("offset", offset);
prop.put("itemscount", Formatter.number(Math.min((item < 0) ? theQuery.neededResults() : item + 1, totalcount)));

@ -168,7 +168,7 @@ public class yacysearchtrailer {
String aboutBody = env.getConfig("about.body", "");
String aboutHeadline = env.getConfig("about.headline", "");
if ((aboutBody.length() == 0 && aboutHeadline.length() == 0) ||
theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() + theSearch.getRankingResult().getRemoteIndexCount() == 0) {
theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount() == 0) {
prop.put("nav-about", 0);
} else {
prop.put("nav-about", 1);

@ -74,6 +74,7 @@ public final class RankingProcess extends Thread {
private final SortedSet<byte[]> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter
private final SortedSet<byte[]> misses; // contains url-hashes that could not been found in the LURL-DB
private int sortout; // counter for referenced that had been sorted out for other reasons
//private final int[] domZones;
private SortedMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
@ -112,6 +113,7 @@ public final class RankingProcess extends Thread {
//this.urlhashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
this.misses = new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder);
//this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
this.sortout = 0;
this.flagcount = new int[32];
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
this.hostNavigator = new ConcurrentScoreMap<String>();
@ -208,21 +210,23 @@ public final class RankingProcess extends Thread {
// check constraints
if (!testFlags(iEntry)) {
this.sortout++;
continue;
}
// check document domain
if (query.contentdom != ContentDomain.TEXT) {
if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) continue;
if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) continue;
if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) continue;
if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) continue;
if ((query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio)))) { this.sortout++; continue; }
if ((query.contentdom == ContentDomain.VIDEO) && (!(iEntry.flags().get(Condenser.flag_cat_hasvideo)))) { this.sortout++; continue; }
if ((query.contentdom == ContentDomain.IMAGE) && (!(iEntry.flags().get(Condenser.flag_cat_hasimage)))) { this.sortout++; continue; }
if ((query.contentdom == ContentDomain.APP ) && (!(iEntry.flags().get(Condenser.flag_cat_hasapp )))) { this.sortout++; continue; }
}
// check tld domain
/*
if ((DigestURI.domDomain(iEntry.metadataHash()) & this.query.zonecode) == 0) {
// filter out all tld that do not match with wanted tld domain
this.sortout++;
continue;
}
*/
@ -241,6 +245,7 @@ public final class RankingProcess extends Thread {
} else {
if (!domhash.equals(query.sitehash)) {
// filter out all domains that do not match with the site constraint
this.sortout++;
continue;
}
}
@ -409,12 +414,14 @@ public final class RankingProcess extends Thread {
// check errors
if (metadata == null) {
this.sortout++;
continue; // rare case where the url is corrupted
}
if (!query.urlMask_isCatchall) {
// check url mask
if (!metadata.matches(query.urlMask)) {
this.sortout++;
continue;
}
@ -428,6 +435,7 @@ public final class RankingProcess extends Thread {
// check for more errors
if (metadata.url() == null) {
this.sortout++;
continue; // rare case where the url is corrupted
}
@ -439,6 +447,7 @@ public final class RankingProcess extends Thread {
if ((QueryParams.anymatch(pagetitle, query.excludeHashes)) ||
(QueryParams.anymatch(pageurl.toLowerCase(), query.excludeHashes)) ||
(QueryParams.anymatch(pageauthor.toLowerCase(), query.excludeHashes))) {
this.sortout++;
continue;
}
@ -450,6 +459,7 @@ public final class RankingProcess extends Thread {
while (wi.hasNext()) {
this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
}
this.sortout++;
continue;
}
@ -457,6 +467,7 @@ public final class RankingProcess extends Thread {
if ((query.constraint != null) &&
(query.constraint.get(Condenser.flag_cat_haslocation)) &&
(metadata.lat() == 0.0f || metadata.lon() == 0.0f)) {
this.sortout++;
continue;
}
@ -465,6 +476,7 @@ public final class RankingProcess extends Thread {
(query.contentdom == ContentDomain.VIDEO && page.lvideo() == 0) ||
(query.contentdom == ContentDomain.IMAGE && page.limage() == 0) ||
(query.contentdom == ContentDomain.APP && page.lapp() == 0)) {
this.sortout++;
continue;
}
@ -476,12 +488,14 @@ public final class RankingProcess extends Thread {
// check if we already are filtering for authors
if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) {
this.sortout++;
continue;
}
// add author to the author navigator
this.authorNavigator.inc(pageauthor);
} else if (this.query.authorhash != null) {
this.sortout++;
continue;
}
@ -498,6 +512,7 @@ public final class RankingProcess extends Thread {
// check Scanner
if (!Scanner.acceptURL(metadata.url())) {
this.sortout++;
continue;
}
@ -570,6 +585,10 @@ public final class RankingProcess extends Thread {
return this.misses.size();
}
public int getSortOutCount() {
return this.sortout;
}
public ScoreMap<String> getNamespaceNavigator() {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ClusteredScoreMap<String>();
if (this.namespaceNavigator.sizeSmaller(2)) this.namespaceNavigator.clear(); // navigators with one entry are not useful

Loading…
Cancel
Save