From d74472f5625ff097e7541e1a56156cbe487b2651 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 27 Feb 2013 22:40:23 +0100 Subject: [PATCH] corrected result counter --- htroot/IndexControlRWIs_p.java | 9 -- htroot/yacysearch.java | 17 ++-- htroot/yacysearchitem.java | 7 +- htroot/yacysearchlatestinfo.java | 7 +- .../sorting/WeakPriorityBlockingQueue.java | 24 ++++- .../citation/CitationReferenceFactory.java | 2 +- .../kelondro/data/meta/URIMetadataNode.java | 18 ++-- .../kelondro/data/meta/URIMetadataRow.java | 2 +- .../data/word/WordReferenceFactory.java | 4 +- .../kelondro/data/word/WordReferenceVars.java | 32 ++++-- .../yacy/kelondro/rwi/ReferenceContainer.java | 6 +- .../yacy/kelondro/rwi/ReferenceFactory.java | 2 +- .../peers/graphics/WebStructureGraph.java | 2 +- source/net/yacy/search/index/Fulltext.java | 12 ++- source/net/yacy/search/query/SearchEvent.java | 99 +++++++++++-------- .../yacy/search/ranking/ReferenceOrder.java | 12 ++- 16 files changed, 148 insertions(+), 107 deletions(-) diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 152e8c0ac..373498531 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -549,15 +549,6 @@ public class IndexControlRWIs_p { break; } } - final Iterator iter = theSearch.misses.iterator(); // iterates url hash strings - byte[] b; - while ( iter.hasNext() ) { - b = iter.next(); - prop.put("genUrlList_urlList_" + i + "_urlExists", "0"); - prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxCount", i); - prop.putHTML("genUrlList_urlList_" + i + "_urlExists_urlhxValue", b); - i++; - } prop.put("genUrlList_urlList", i); prop.putHTML("genUrlList_keyString", keystring); prop.put("genUrlList_count", i); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 6be8297b6..d1cb1002d 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -733,12 +733,14 @@ public class yacysearch { + " - " + "local_rwi_available(" + theSearch.local_rwi_available.get() + "), " + "local_rwi_stored(" + theSearch.local_rwi_stored.get() + "), " + + "remote_rwi_available(" + theSearch.remote_rwi_available.get() + "), " + + "remote_rwi_stored(" + theSearch.remote_rwi_stored.get() + "), " + + "remote_rwi_peerCount(" + theSearch.remote_rwi_peerCount.get() + "), " + "local_solr_available(" + theSearch.local_solr_available.get() + "), " + "local_solr_stored(" + theSearch.local_solr_stored.get() + "), " - + "remote_available(" + theSearch.remote_available.get() + "), " - + "remote_stored(" + theSearch.remote_stored.get() + "), " - + "remote_peerCount(" + theSearch.remote_peerCount.get() + "), " - + "local_sortout(" + theSearch.misses.size() + "), " + + "remote_solr_available(" + theSearch.remote_solr_available.get() + "), " + + "remote_solr_stored(" + theSearch.remote_solr_stored.get() + "), " + + "remote_solr_peerCount(" + theSearch.remote_solr_peerCount.get() + "), " + (System.currentTimeMillis() - timestamp) + " ms"); @@ -827,10 +829,9 @@ public class yacysearch { prop.put("num-results_totalcount", Formatter.number(theSearch.getResultCount())); prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch) ? "1" : "0"); prop.put("num-results_globalresults_localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true)); - prop.put("num-results_globalresults_localMissCount", Formatter.number(theSearch.misses.size(), true)); - prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_available.get(), true)); - prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_stored.get(), true)); - prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true)); + prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true)); + prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true)); + prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true)); // compose page navigation final StringBuilder resnav = new StringBuilder(200); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 8e4d6d312..6ce1abb0b 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -98,10 +98,9 @@ public class yacysearchitem { prop.put("itemsperpage", Formatter.number(theSearch.query.itemsPerPage)); prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true)); prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true)); - prop.put("localMissCount", Formatter.number(theSearch.misses.size(), true)); - prop.put("remoteResourceSize", Formatter.number(theSearch.remote_stored.get(), true)); - prop.put("remoteIndexCount", Formatter.number(theSearch.remote_available.get(), true)); - prop.put("remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true)); + prop.put("remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true)); + prop.put("remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true)); + prop.put("remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true)); prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString()); final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, ""); diff --git a/htroot/yacysearchlatestinfo.java b/htroot/yacysearchlatestinfo.java index a5b379dfb..e6d7bb0b6 100644 --- a/htroot/yacysearchlatestinfo.java +++ b/htroot/yacysearchlatestinfo.java @@ -38,10 +38,9 @@ public class yacysearchlatestinfo { prop.put("itemsperpage", theSearch.query.itemsPerPage); prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true)); prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true)); - prop.put("localMissCount", Formatter.number(theSearch.misses.size(), true)); - prop.put("remoteResourceSize", Formatter.number(theSearch.remote_stored.get(), true)); - prop.put("remoteIndexCount", Formatter.number(theSearch.remote_available.get(), true)); - prop.put("remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true)); + prop.put("remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true)); + prop.put("remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true)); + prop.put("remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true)); prop.putJSON("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString()); return prop; diff --git a/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java b/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java index 62f012f08..e7200eff9 100644 --- a/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java +++ b/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java @@ -159,15 +159,31 @@ public class WeakPriorityBlockingQueue implements Serializable { } private Element takeUnsafe() { - final Element element = this.queue.first(); + final Element element = this.queue.pollFirst(); assert element != null; - this.queue.remove(element); if (this.drained != null && (this.maxsize == -1 || this.drained.size() < this.maxsize)) this.drained.add(element); assert this.queue.size() >= this.enqueued.availablePermits() : "(take) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits(); return element; } - - + + /** + * remove a drained element + * @param element + */ + /* + public void removeDrained(Element element) { + if (element == null) return; + synchronized (this.drained) { + int p = this.drained.size() - 1; + if (this.drained.get(p) == element) { + this.drained.remove(p); + return; + } + } + this.drained.remove(element); + } + */ + /** * return the element with the smallest weight, but do not remove it * @return null if no element is on the queue or the head of the queue diff --git a/source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java b/source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java index c827a64a8..12709e2dc 100644 --- a/source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java +++ b/source/net/yacy/kelondro/data/citation/CitationReferenceFactory.java @@ -36,7 +36,7 @@ public class CitationReferenceFactory implements ReferenceFactory, Se } @Override - public WordReference produceFast(final WordReference r) { + public WordReference produceFast(final WordReference r, final boolean local) { if (r instanceof WordReferenceVars) return r; - return new WordReferenceVars(r); + return new WordReferenceVars(r, local); } @Override diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index 87b7d5110..aebc82263 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -70,8 +70,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc private int virtualAge; private final Queue positions; private double termFrequency; + private final boolean local; - public WordReferenceVars(final URIMetadataRow md) { + public WordReferenceVars(final URIMetadataRow md, final boolean local) { this.language = md.language(); this.flags = md.flags(); this.lastModified = md.moddate().getTime(); @@ -93,6 +94,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.wordsintext = 1; this.wordsintitle = 1; this.termFrequency = 1; + this.local = local; } public WordReferenceVars( @@ -135,9 +137,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.wordsintext = wordcount; this.wordsintitle = titleLength; this.termFrequency = termfrequency; + this.local = true; } - public WordReferenceVars(final WordReference e) { + public WordReferenceVars(final WordReference e, boolean local) { this.flags = e.flags(); //this.freshUntil = e.freshUntil(); this.lastModified = e.lastModified(); @@ -158,6 +161,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.wordsintext = e.wordsintext(); this.wordsintitle = e.wordsintitle(); this.termFrequency = e.termFrequency(); + this.local = local; } /** @@ -182,6 +186,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.wordsintext = 0; this.wordsintitle = 0; this.termFrequency = 0.0; + this.local = true; } @Override @@ -349,6 +354,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc if (this.termFrequency == 0.0) this.termFrequency = (((double) hitcount()) / ((double) (wordsintext() + wordsintitle() + 1))); return this.termFrequency; } + + public boolean local() { + return this.local; + } public final void min(final WordReferenceVars other) { if (other == null) return; @@ -448,13 +457,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc * @return a blocking queue filled with WordReferenceVars that is still filled when the object is returned */ - public static BlockingQueue transform(final ReferenceContainer container, final long maxtime) { + public static BlockingQueue transform(final ReferenceContainer container, final long maxtime, final boolean local) { final LinkedBlockingQueue vars = new LinkedBlockingQueue(); if (container.size() <= 100) { // transform without concurrency to omit thread creation overhead for (final Row.Entry entry: container) { try { - vars.put(new WordReferenceVars(new WordReferenceRow(entry))); + vars.put(new WordReferenceVars(new WordReferenceRow(entry), local)); } catch (final InterruptedException e) {} } try { @@ -462,7 +471,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc } catch (final InterruptedException e) {} return vars; } - final Thread distributor = new TransformDistributor(container, vars, maxtime); + final Thread distributor = new TransformDistributor(container, vars, maxtime, local); distributor.start(); // return the resulting queue while the processing queues are still working @@ -474,11 +483,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc private ReferenceContainer container; private BlockingQueue out; private long maxtime; - - private TransformDistributor(final ReferenceContainer container, final BlockingQueue out, final long maxtime) { + private final boolean local; + private TransformDistributor(final ReferenceContainer container, final BlockingQueue out, final long maxtime, final boolean local) { this.container = container; this.out = out; this.maxtime = maxtime; + this.local = local; } @Override @@ -487,7 +497,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc final int cores0 = Math.min(cores, this.container.size() / 100) + 1; final TransformWorker[] worker = new TransformWorker[cores0]; for (int i = 0; i < cores0; i++) { - worker[i] = new TransformWorker(this.out, this.maxtime); + worker[i] = new TransformWorker(this.out, this.maxtime, this.local); worker[i].start(); } long timeout = System.currentTimeMillis() + this.maxtime; @@ -525,11 +535,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc private BlockingQueue in; private BlockingQueue out; private long maxtime; + private final boolean local; - private TransformWorker(final BlockingQueue out, final long maxtime) { + private TransformWorker(final BlockingQueue out, final long maxtime, final boolean local) { this.in = new LinkedBlockingQueue(); this.out = out; this.maxtime = maxtime; + this.local = local; } private void add(final Row.Entry entry) { @@ -545,7 +557,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc long timeout = System.currentTimeMillis() + this.maxtime; try { while ((entry = this.in.take()) != WordReferenceRow.poisonRowEntry) { - this.out.put(new WordReferenceVars(new WordReferenceRow(entry))); + this.out.put(new WordReferenceVars(new WordReferenceRow(entry), local)); if (System.currentTimeMillis() > timeout) { Log.logWarning("TransformWorker", "normalization of row entries from row to vars ended with timeout = " + this.maxtime); break; diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainer.java b/source/net/yacy/kelondro/rwi/ReferenceContainer.java index 690f71038..46a6fdb4b 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainer.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainer.java @@ -432,7 +432,7 @@ public class ReferenceContainer extends RowSet assert (ie2.urlhash().length == keylength) : "ie1.urlHash() = " + ASCII.String(ie2.urlhash()); // this is a hit. Calculate word distance: - ie1 = factory.produceFast(ie2); + ie1 = factory.produceFast(ie2, true); ie1.join(ie2); if (ie1.distance() <= maxDistance) conj.add(ie1); } @@ -472,7 +472,7 @@ public class ReferenceContainer extends RowSet if (e2.hasNext()) ie2 = e2.next(); else break; } else { // we have found the same urls in different searches! - ie1 = factory.produceFast(ie1); + ie1 = factory.produceFast(ie1, true); ie1.join(ie2); if (ie1.distance() <= maxDistance) conj.add(ie1); if (e1.hasNext()) ie1 = e1.next(); else break; @@ -554,7 +554,7 @@ public class ReferenceContainer extends RowSet if (e2.hasNext()) ie2 = e2.next(); else break; } else { // we have found the same urls in different searches! - ie1 = factory.produceFast(ie1); + ie1 = factory.produceFast(ie1, true); ie1.join(ie2); e1.remove(); if (e1.hasNext()) ie1 = e1.next(); else break; diff --git a/source/net/yacy/kelondro/rwi/ReferenceFactory.java b/source/net/yacy/kelondro/rwi/ReferenceFactory.java index 5b158937c..b0333bc6b 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceFactory.java +++ b/source/net/yacy/kelondro/rwi/ReferenceFactory.java @@ -34,6 +34,6 @@ public interface ReferenceFactory { public ReferenceType produceSlow(Row.Entry e); - public ReferenceType produceFast(ReferenceType e); + public ReferenceType produceFast(ReferenceType e, final boolean local); } diff --git a/source/net/yacy/peers/graphics/WebStructureGraph.java b/source/net/yacy/peers/graphics/WebStructureGraph.java index 5e0cbd555..c61f8bdeb 100644 --- a/source/net/yacy/peers/graphics/WebStructureGraph.java +++ b/source/net/yacy/peers/graphics/WebStructureGraph.java @@ -377,7 +377,7 @@ public class WebStructureGraph { } @Override - public HostReference produceFast(final HostReference e) { + public HostReference produceFast(final HostReference e, final boolean local) { return e; } diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index fa0538568..bb36133dc 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -54,6 +54,7 @@ import net.yacy.cora.federate.solr.instance.ShardInstance; import net.yacy.cora.order.CloneableIterator; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.sorting.ScoreMap; +import net.yacy.cora.sorting.WeakPriorityBlockingQueue; import net.yacy.cora.storage.ZIPReader; import net.yacy.cora.storage.ZIPWriter; import net.yacy.document.parser.html.CharacterCoding; @@ -61,6 +62,7 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.WordReference; +import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.index.Cache; import net.yacy.kelondro.index.Index; import net.yacy.kelondro.index.Row; @@ -315,9 +317,13 @@ public final class Fulltext { } } - public URIMetadataNode getMetadata(WordReference wre, long weight) { + public URIMetadataNode getMetadata(WeakPriorityBlockingQueue.Element element) { + if (element == null) return null; + WordReferenceVars wre = element.getElement(); + long weight = element.getWeight(); if (wre == null) return null; // all time was already wasted in takeRWI to get another element - return getMetadata(wre.urlhash(), wre, weight); + URIMetadataNode node = getMetadata(wre.urlhash(), wre, weight); + return node; } public URIMetadataNode getMetadata(final byte[] urlHash) { @@ -325,7 +331,7 @@ public final class Fulltext { return getMetadata(urlHash, null, 0); } - private URIMetadataNode getMetadata(final byte[] urlHash, WordReference wre, long weight) { + private URIMetadataNode getMetadata(final byte[] urlHash, WordReferenceVars wre, long weight) { // get the metadata from Solr try { diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 8ca053a86..7e83995d0 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -28,16 +28,13 @@ package net.yacy.search.query; import java.text.ParseException; import java.util.ArrayList; -import java.util.Collections; import java.util.ConcurrentModificationException; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.SortedMap; -import java.util.SortedSet; import java.util.TreeMap; -import java.util.TreeSet; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; @@ -98,7 +95,7 @@ import net.yacy.search.snippet.TextSnippet.ResultClass; public final class SearchEvent { - private static final int max_results_preparation = 3000, max_results_preparation_special = -1; // -1 means 'no limit' + private static final int max_results_rwi = 3000; private static long noRobinsonLocalRWISearch = 0; static { @@ -160,15 +157,17 @@ public final class SearchEvent { // the following values are filled during the search process as statistics for the search public final AtomicInteger local_rwi_available; // the number of hits generated/ranked by the local search in rwi index public final AtomicInteger local_rwi_stored; // the number of existing hits by the local search in rwi index + public final AtomicInteger remote_rwi_available; // the number of hits imported from remote peers (rwi/solr mixed) + public final AtomicInteger remote_rwi_stored; // the number of existing hits at remote site + public final AtomicInteger remote_rwi_peerCount; // the number of peers which contributed to the remote search result public final AtomicInteger local_solr_available; // the number of hits generated/ranked by the local search in solr public final AtomicInteger local_solr_stored; // the number of existing hits by the local search in solr - public final AtomicInteger remote_available; // the number of hits imported from remote peers (rwi/solr mixed) - public final AtomicInteger remote_stored; // the number of existing hits at remote site - public final AtomicInteger remote_peerCount; // the number of peers which contributed to the remote search result - public final SortedSet misses; // url hashes that had been sorted out because of constraints in postranking - + public final AtomicInteger remote_solr_available;// the number of hits imported from remote peers (rwi/solr mixed) + public final AtomicInteger remote_solr_stored; // the number of existing hits at remote site + public final AtomicInteger remote_solr_peerCount;// the number of peers which contributed to the remote search result + public int getResultCount() { - return this.rwiStack.sizeQueue() + this.nodeStack.sizeQueue() + this.resultList.sizeAvailable(); + return this.local_rwi_available.get() + local_solr_stored.get(); } protected SearchEvent( @@ -214,14 +213,16 @@ public final class SearchEvent { this.IAneardhthash = null; this.localSearchThread = null; this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && peers.mySeed().getFlagAcceptRemoteIndex())); - this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering - this.local_rwi_stored = new AtomicInteger(0); - this.local_solr_available= new AtomicInteger(0); - this.local_solr_stored = new AtomicInteger(0); - this.remote_stored = new AtomicInteger(0); - this.remote_available = new AtomicInteger(0); // the number of result contributions from all the remote peers - this.remote_peerCount = new AtomicInteger(0); // the number of remote peers that have contributed - this.misses = Collections.synchronizedSortedSet(new TreeSet(URIMetadataRow.rowdef.objectOrder)); + this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering + this.local_rwi_stored = new AtomicInteger(0); + this.local_solr_available = new AtomicInteger(0); + this.local_solr_stored = new AtomicInteger(0); + this.remote_rwi_stored = new AtomicInteger(0); + this.remote_rwi_available = new AtomicInteger(0); // the number of result contributions from all the remote dht peers + this.remote_rwi_peerCount = new AtomicInteger(0); // the number of remote dht peers that have contributed + this.remote_solr_stored = new AtomicInteger(0); + this.remote_solr_available= new AtomicInteger(0); // the number of result contributions from all the remote solr peers + this.remote_solr_peerCount= new AtomicInteger(0); // the number of remote solr peers that have contributed final long start = System.currentTimeMillis(); // do a soft commit for fresh results @@ -233,8 +234,7 @@ public final class SearchEvent { this.localSearchInclusion = null; this.ref = new ConcurrentScoreMap(); this.maxtime = query.maxtime; - int stackMaxsize = query.snippetCacheStrategy == null || query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation; - this.rwiStack = new WeakPriorityBlockingQueue(stackMaxsize, false); + this.rwiStack = new WeakPriorityBlockingQueue(max_results_rwi, false); this.doubleDomCache = new ConcurrentHashMap>(); this.flagcount = new int[32]; for ( int i = 0; i < 32; i++ ) { @@ -445,13 +445,13 @@ public final class SearchEvent { this.local_rwi_stored.addAndGet(fullResource); } else { assert fullResource >= 0 : "fullResource = " + fullResource; - this.remote_stored.addAndGet(fullResource); - this.remote_peerCount.incrementAndGet(); + this.remote_rwi_stored.addAndGet(fullResource); + this.remote_rwi_peerCount.incrementAndGet(); } long timer = System.currentTimeMillis(); // normalize entries - final BlockingQueue decodedEntries = this.order.normalizeWith(index, maxtime); + final BlockingQueue decodedEntries = this.order.normalizeWith(index, maxtime, local); int is = index.size(); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch( this.query.id(true), @@ -530,7 +530,7 @@ public final class SearchEvent { } } // increase counter for statistics - if (local) this.local_rwi_available.incrementAndGet(); else this.remote_available.incrementAndGet(); + if (local) this.local_rwi_available.incrementAndGet(); else this.remote_rwi_available.incrementAndGet(); } if (System.currentTimeMillis() >= timeout) Log.logWarning("SearchEvent", "rwi normalization ended with timeout = " + maxtime); @@ -650,8 +650,8 @@ public final class SearchEvent { this.local_solr_stored.set(fullResource); } else { assert fullResource >= 0 : "fullResource = " + fullResource; - this.remote_stored.addAndGet(fullResource); - this.remote_peerCount.incrementAndGet(); + this.remote_solr_stored.addAndGet(fullResource); + this.remote_solr_peerCount.incrementAndGet(); } long timer = System.currentTimeMillis(); @@ -785,7 +785,7 @@ public final class SearchEvent { } } // increase counter for statistics - if (local) this.local_solr_available.incrementAndGet(); else this.remote_available.incrementAndGet(); + if (local) this.local_solr_available.incrementAndGet(); else this.remote_solr_available.incrementAndGet(); } } catch ( final SpaceExceededException e ) { } @@ -819,7 +819,7 @@ public final class SearchEvent { rwi = this.rwiStack.poll(); if (rwi == null) return null; if (!skipDoubleDom) { - URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi.getElement(), rwi.getWeight()); + URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi); if (node == null) continue pollloop; return node; } @@ -832,9 +832,9 @@ public final class SearchEvent { m = this.doubleDomCache.get(hosthash); if (m == null) { // first appearance of dom. we create an entry to signal that one of that domain was already returned - m = new WeakPriorityBlockingQueue(this.query.snippetCacheStrategy == null || this.query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation, false); + m = new WeakPriorityBlockingQueue(max_results_rwi, false); this.doubleDomCache.put(hosthash, m); - URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi.getElement(), rwi.getWeight()); + URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi); if (node == null) continue pollloop; return node; } @@ -894,8 +894,12 @@ public final class SearchEvent { //Log.logWarning("SearchEvent", "bestEntry == null (2)"); return null; } - URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(bestEntry.getElement(), bestEntry.getWeight()); - if (node == null) continue mainloop; + URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(bestEntry); + if (node == null) { + if (bestEntry.getElement().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); + if (log.isFine()) log.logFine("dropped RWI: hash not in metadata"); + continue mainloop; + } return node; } } @@ -916,14 +920,15 @@ public final class SearchEvent { while ((page = pullOneRWI(skipDoubleDom)) != null) { if (!this.query.urlMask_isCatchall && !page.matches(this.query.urlMask)) { - // check url mask - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: no match with urlMask"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } // check for more errors if (page.url() == null) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: url == null"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; // rare case where the url is corrupted } @@ -933,13 +938,15 @@ public final class SearchEvent { (this.query.contentdom == Classification.ContentDomain.AUDIO && page.url().getContentDomain() != Classification.ContentDomain.AUDIO) || (this.query.contentdom == Classification.ContentDomain.VIDEO && page.url().getContentDomain() != Classification.ContentDomain.VIDEO) || (this.query.contentdom == Classification.ContentDomain.APP && page.url().getContentDomain() != Classification.ContentDomain.APP)) && this.query.urlMask_isCatchall) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: wrong contentdom = " + this.query.contentdom + ", domain = " + page.url().getContentDomain()); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } // Check for blacklist if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page)) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: url is blacklisted in url blacklist"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } @@ -947,7 +954,8 @@ public final class SearchEvent { if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false)) { FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter(); if (f != null && !f.isListed(page.url(), null)) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: url is blacklisted in contentcontrol"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } } @@ -961,7 +969,8 @@ public final class SearchEvent { ((QueryParams.anymatch(pagetitle, this.query.getQueryGoal().getExcludeHashes())) || (QueryParams.anymatch(pageurl.toLowerCase(), this.query.getQueryGoal().getExcludeHashes())) || (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.getQueryGoal().getExcludeHashes())))) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: no match with query goal exclusion"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } @@ -971,13 +980,15 @@ public final class SearchEvent { while (wi.hasNext()) { this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash()); } - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: url does not match index-of constraint"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } // check location constraint if ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_haslocation)) && (page.lat() == 0.0 || page.lon() == 0.0)) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: location constraint"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } @@ -988,14 +999,16 @@ public final class SearchEvent { double lonDelta = this.query.lon - lon; double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras if (distance > this.query.radius) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: radius constraint"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } } // check Scanner if (this.query.filterscannerfail && !Scanner.acceptURL(page.url())) { - this.misses.add(page.hash()); + if (log.isFine()) log.logFine("dropped RWI: url not accepted by scanner"); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); continue; } diff --git a/source/net/yacy/search/ranking/ReferenceOrder.java b/source/net/yacy/search/ranking/ReferenceOrder.java index 94238cda3..c92dee88c 100644 --- a/source/net/yacy/search/ranking/ReferenceOrder.java +++ b/source/net/yacy/search/ranking/ReferenceOrder.java @@ -66,11 +66,11 @@ public class ReferenceOrder { this.language = language; } - public BlockingQueue normalizeWith(final ReferenceContainer container, long maxtime) { + public BlockingQueue normalizeWith(final ReferenceContainer container, long maxtime, final boolean local) { final LinkedBlockingQueue out = new LinkedBlockingQueue(); int threads = cores; if (container.size() < 100) threads = 2; - final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime); + final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime, local); distributor.start(); // return the resulting queue while the processing queues are still working @@ -83,18 +83,20 @@ public class ReferenceOrder { LinkedBlockingQueue out; private final int threads; private final long maxtime; - - public NormalizeDistributor(final ReferenceContainer container, final LinkedBlockingQueue out, final int threads, final long maxtime) { + private final boolean local; + + public NormalizeDistributor(final ReferenceContainer container, final LinkedBlockingQueue out, final int threads, final long maxtime, final boolean local) { this.container = container; this.out = out; this.threads = threads; this.maxtime = maxtime; + this.local = local; } @Override public void run() { // transform the reference container into a stream of parsed entries - final BlockingQueue vars = WordReferenceVars.transform(this.container, this.maxtime); + final BlockingQueue vars = WordReferenceVars.transform(this.container, this.maxtime, this.local); // start the transformation threads final Semaphore termination = new Semaphore(this.threads);