corrected result counter

pull/1/head
orbiter 12 years ago
parent 2555542f7a
commit d74472f562

@ -549,15 +549,6 @@ public class IndexControlRWIs_p {
break;
}
}
final Iterator<byte[]> iter = theSearch.misses.iterator(); // iterates url hash strings
byte[] b;
while ( iter.hasNext() ) {
b = iter.next();
prop.put("genUrlList_urlList_" + i + "_urlExists", "0");
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxCount", i);
prop.putHTML("genUrlList_urlList_" + i + "_urlExists_urlhxValue", b);
i++;
}
prop.put("genUrlList_urlList", i);
prop.putHTML("genUrlList_keyString", keystring);
prop.put("genUrlList_count", i);

@ -733,12 +733,14 @@ public class yacysearch {
+ " - "
+ "local_rwi_available(" + theSearch.local_rwi_available.get() + "), "
+ "local_rwi_stored(" + theSearch.local_rwi_stored.get() + "), "
+ "remote_rwi_available(" + theSearch.remote_rwi_available.get() + "), "
+ "remote_rwi_stored(" + theSearch.remote_rwi_stored.get() + "), "
+ "remote_rwi_peerCount(" + theSearch.remote_rwi_peerCount.get() + "), "
+ "local_solr_available(" + theSearch.local_solr_available.get() + "), "
+ "local_solr_stored(" + theSearch.local_solr_stored.get() + "), "
+ "remote_available(" + theSearch.remote_available.get() + "), "
+ "remote_stored(" + theSearch.remote_stored.get() + "), "
+ "remote_peerCount(" + theSearch.remote_peerCount.get() + "), "
+ "local_sortout(" + theSearch.misses.size() + "), "
+ "remote_solr_available(" + theSearch.remote_solr_available.get() + "), "
+ "remote_solr_stored(" + theSearch.remote_solr_stored.get() + "), "
+ "remote_solr_peerCount(" + theSearch.remote_solr_peerCount.get() + "), "
+ (System.currentTimeMillis() - timestamp)
+ " ms");
@ -827,10 +829,9 @@ public class yacysearch {
prop.put("num-results_totalcount", Formatter.number(theSearch.getResultCount()));
prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch) ? "1" : "0");
prop.put("num-results_globalresults_localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true));
prop.put("num-results_globalresults_localMissCount", Formatter.number(theSearch.misses.size(), true));
prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_available.get(), true));
prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_stored.get(), true));
prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true));
prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
// compose page navigation
final StringBuilder resnav = new StringBuilder(200);

@ -98,10 +98,9 @@ public class yacysearchitem {
prop.put("itemsperpage", Formatter.number(theSearch.query.itemsPerPage));
prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true));
prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true));
prop.put("localMissCount", Formatter.number(theSearch.misses.size(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.remote_stored.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.remote_available.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString());
final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, "");

@ -38,10 +38,9 @@ public class yacysearchlatestinfo {
prop.put("itemsperpage", theSearch.query.itemsPerPage);
prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true));
prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true));
prop.put("localMissCount", Formatter.number(theSearch.misses.size(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.remote_stored.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.remote_available.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
prop.putJSON("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString());
return prop;

@ -159,15 +159,31 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
}
private Element<E> takeUnsafe() {
final Element<E> element = this.queue.first();
final Element<E> element = this.queue.pollFirst();
assert element != null;
this.queue.remove(element);
if (this.drained != null && (this.maxsize == -1 || this.drained.size() < this.maxsize)) this.drained.add(element);
assert this.queue.size() >= this.enqueued.availablePermits() : "(take) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits();
return element;
}
/**
* remove a drained element
* @param element
*/
/*
public void removeDrained(Element<E> element) {
if (element == null) return;
synchronized (this.drained) {
int p = this.drained.size() - 1;
if (this.drained.get(p) == element) {
this.drained.remove(p);
return;
}
}
this.drained.remove(element);
}
*/
/**
* return the element with the smallest weight, but do not remove it
* @return null if no element is on the queue or the head of the queue

@ -36,7 +36,7 @@ public class CitationReferenceFactory implements ReferenceFactory<CitationRefere
}
@Override
public CitationReference produceFast(final CitationReference r) {
public CitationReference produceFast(final CitationReference r, final boolean local) {
throw new UnsupportedOperationException();
}

@ -40,7 +40,7 @@ import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.order.Base64Order;
import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.search.schema.CollectionSchema;
@ -77,8 +77,8 @@ public class URIMetadataNode {
private long ranking = -1; // during generation of a search result this value is set
private SolrDocument doc = null;
private String snippet = null;
private WordReference word = null; // this is only used if the url is transported via remote search requests
private WordReferenceVars word = null; // this is only used if the url is transported via remote search requests
public URIMetadataNode(final SolrInputDocument doc) {
this(ClientUtils.toSolrDocument(doc));
}
@ -98,18 +98,18 @@ public class URIMetadataNode {
}
}
public URIMetadataNode(final SolrInputDocument doc, final WordReference searchedWord, final long ranking) {
public URIMetadataNode(final SolrInputDocument doc, final WordReferenceVars searchedWord, final long ranking) {
this(ClientUtils.toSolrDocument(doc));
this.word = searchedWord;
this.ranking = ranking;
}
public URIMetadataNode(final SolrDocument doc, final WordReference searchedWord, final long ranking) {
public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final long ranking) {
this(doc);
this.word = searchedWord;
this.ranking = ranking;
}
public SolrDocument getDocument() {
return this.doc;
}
@ -292,7 +292,7 @@ public class URIMetadataNode {
return a.toArray(new String[a.size()]);
}
public WordReference word() {
public WordReferenceVars word() {
return this.word;
}
@ -429,6 +429,7 @@ public class URIMetadataNode {
return core.toString();
}
/*
private DigestURI getURL(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight;
@ -440,7 +441,8 @@ public class URIMetadataNode {
return null;
}
}
*/
private int getInt(CollectionSchema field) {
assert !field.isMultiValued();
assert field.getType() == SolrType.num_integer;

@ -173,7 +173,7 @@ public class URIMetadataRow {
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""));
this.word = null;
if (prop.containsKey("wi")) {
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))));
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))), false);
}
this.comp = null;
}

@ -52,9 +52,9 @@ public class WordReferenceFactory implements ReferenceFactory<WordReference>, Se
}
@Override
public WordReference produceFast(final WordReference r) {
public WordReference produceFast(final WordReference r, final boolean local) {
if (r instanceof WordReferenceVars) return r;
return new WordReferenceVars(r);
return new WordReferenceVars(r, local);
}
@Override

@ -70,8 +70,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private int virtualAge;
private final Queue<Integer> positions;
private double termFrequency;
private final boolean local;
public WordReferenceVars(final URIMetadataRow md) {
public WordReferenceVars(final URIMetadataRow md, final boolean local) {
this.language = md.language();
this.flags = md.flags();
this.lastModified = md.moddate().getTime();
@ -93,6 +94,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = 1;
this.wordsintitle = 1;
this.termFrequency = 1;
this.local = local;
}
public WordReferenceVars(
@ -135,9 +137,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = wordcount;
this.wordsintitle = titleLength;
this.termFrequency = termfrequency;
this.local = true;
}
public WordReferenceVars(final WordReference e) {
public WordReferenceVars(final WordReference e, boolean local) {
this.flags = e.flags();
//this.freshUntil = e.freshUntil();
this.lastModified = e.lastModified();
@ -158,6 +161,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = e.wordsintext();
this.wordsintitle = e.wordsintitle();
this.termFrequency = e.termFrequency();
this.local = local;
}
/**
@ -182,6 +186,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = 0;
this.wordsintitle = 0;
this.termFrequency = 0.0;
this.local = true;
}
@Override
@ -349,6 +354,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.termFrequency == 0.0) this.termFrequency = (((double) hitcount()) / ((double) (wordsintext() + wordsintitle() + 1)));
return this.termFrequency;
}
public boolean local() {
return this.local;
}
public final void min(final WordReferenceVars other) {
if (other == null) return;
@ -448,13 +457,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
* @return a blocking queue filled with WordReferenceVars that is still filled when the object is returned
*/
public static BlockingQueue<WordReferenceVars> transform(final ReferenceContainer<WordReference> container, final long maxtime) {
public static BlockingQueue<WordReferenceVars> transform(final ReferenceContainer<WordReference> container, final long maxtime, final boolean local) {
final LinkedBlockingQueue<WordReferenceVars> vars = new LinkedBlockingQueue<WordReferenceVars>();
if (container.size() <= 100) {
// transform without concurrency to omit thread creation overhead
for (final Row.Entry entry: container) {
try {
vars.put(new WordReferenceVars(new WordReferenceRow(entry)));
vars.put(new WordReferenceVars(new WordReferenceRow(entry), local));
} catch (final InterruptedException e) {}
}
try {
@ -462,7 +471,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
} catch (final InterruptedException e) {}
return vars;
}
final Thread distributor = new TransformDistributor(container, vars, maxtime);
final Thread distributor = new TransformDistributor(container, vars, maxtime, local);
distributor.start();
// return the resulting queue while the processing queues are still working
@ -474,11 +483,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private ReferenceContainer<WordReference> container;
private BlockingQueue<WordReferenceVars> out;
private long maxtime;
private TransformDistributor(final ReferenceContainer<WordReference> container, final BlockingQueue<WordReferenceVars> out, final long maxtime) {
private final boolean local;
private TransformDistributor(final ReferenceContainer<WordReference> container, final BlockingQueue<WordReferenceVars> out, final long maxtime, final boolean local) {
this.container = container;
this.out = out;
this.maxtime = maxtime;
this.local = local;
}
@Override
@ -487,7 +497,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final int cores0 = Math.min(cores, this.container.size() / 100) + 1;
final TransformWorker[] worker = new TransformWorker[cores0];
for (int i = 0; i < cores0; i++) {
worker[i] = new TransformWorker(this.out, this.maxtime);
worker[i] = new TransformWorker(this.out, this.maxtime, this.local);
worker[i].start();
}
long timeout = System.currentTimeMillis() + this.maxtime;
@ -525,11 +535,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private BlockingQueue<Row.Entry> in;
private BlockingQueue<WordReferenceVars> out;
private long maxtime;
private final boolean local;
private TransformWorker(final BlockingQueue<WordReferenceVars> out, final long maxtime) {
private TransformWorker(final BlockingQueue<WordReferenceVars> out, final long maxtime, final boolean local) {
this.in = new LinkedBlockingQueue<Row.Entry>();
this.out = out;
this.maxtime = maxtime;
this.local = local;
}
private void add(final Row.Entry entry) {
@ -545,7 +557,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
long timeout = System.currentTimeMillis() + this.maxtime;
try {
while ((entry = this.in.take()) != WordReferenceRow.poisonRowEntry) {
this.out.put(new WordReferenceVars(new WordReferenceRow(entry)));
this.out.put(new WordReferenceVars(new WordReferenceRow(entry), local));
if (System.currentTimeMillis() > timeout) {
Log.logWarning("TransformWorker", "normalization of row entries from row to vars ended with timeout = " + this.maxtime);
break;

@ -432,7 +432,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
assert (ie2.urlhash().length == keylength) : "ie1.urlHash() = " + ASCII.String(ie2.urlhash());
// this is a hit. Calculate word distance:
ie1 = factory.produceFast(ie2);
ie1 = factory.produceFast(ie2, true);
ie1.join(ie2);
if (ie1.distance() <= maxDistance) conj.add(ie1);
}
@ -472,7 +472,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
if (e2.hasNext()) ie2 = e2.next(); else break;
} else {
// we have found the same urls in different searches!
ie1 = factory.produceFast(ie1);
ie1 = factory.produceFast(ie1, true);
ie1.join(ie2);
if (ie1.distance() <= maxDistance) conj.add(ie1);
if (e1.hasNext()) ie1 = e1.next(); else break;
@ -554,7 +554,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
if (e2.hasNext()) ie2 = e2.next(); else break;
} else {
// we have found the same urls in different searches!
ie1 = factory.produceFast(ie1);
ie1 = factory.produceFast(ie1, true);
ie1.join(ie2);
e1.remove();
if (e1.hasNext()) ie1 = e1.next(); else break;

@ -34,6 +34,6 @@ public interface ReferenceFactory<ReferenceType extends Reference> {
public ReferenceType produceSlow(Row.Entry e);
public ReferenceType produceFast(ReferenceType e);
public ReferenceType produceFast(ReferenceType e, final boolean local);
}

@ -377,7 +377,7 @@ public class WebStructureGraph {
}
@Override
public HostReference produceFast(final HostReference e) {
public HostReference produceFast(final HostReference e, final boolean local) {
return e;
}

@ -54,6 +54,7 @@ import net.yacy.cora.federate.solr.instance.ShardInstance;
import net.yacy.cora.order.CloneableIterator;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.ZIPReader;
import net.yacy.cora.storage.ZIPWriter;
import net.yacy.document.parser.html.CharacterCoding;
@ -61,6 +62,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.index.Cache;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
@ -315,9 +317,13 @@ public final class Fulltext {
}
}
public URIMetadataNode getMetadata(WordReference wre, long weight) {
public URIMetadataNode getMetadata(WeakPriorityBlockingQueue.Element<WordReferenceVars> element) {
if (element == null) return null;
WordReferenceVars wre = element.getElement();
long weight = element.getWeight();
if (wre == null) return null; // all time was already wasted in takeRWI to get another element
return getMetadata(wre.urlhash(), wre, weight);
URIMetadataNode node = getMetadata(wre.urlhash(), wre, weight);
return node;
}
public URIMetadataNode getMetadata(final byte[] urlHash) {
@ -325,7 +331,7 @@ public final class Fulltext {
return getMetadata(urlHash, null, 0);
}
private URIMetadataNode getMetadata(final byte[] urlHash, WordReference wre, long weight) {
private URIMetadataNode getMetadata(final byte[] urlHash, WordReferenceVars wre, long weight) {
// get the metadata from Solr
try {

@ -28,16 +28,13 @@ package net.yacy.search.query;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
@ -98,7 +95,7 @@ import net.yacy.search.snippet.TextSnippet.ResultClass;
public final class SearchEvent {
private static final int max_results_preparation = 3000, max_results_preparation_special = -1; // -1 means 'no limit'
private static final int max_results_rwi = 3000;
private static long noRobinsonLocalRWISearch = 0;
static {
@ -160,15 +157,17 @@ public final class SearchEvent {
// the following values are filled during the search process as statistics for the search
public final AtomicInteger local_rwi_available; // the number of hits generated/ranked by the local search in rwi index
public final AtomicInteger local_rwi_stored; // the number of existing hits by the local search in rwi index
public final AtomicInteger remote_rwi_available; // the number of hits imported from remote peers (rwi/solr mixed)
public final AtomicInteger remote_rwi_stored; // the number of existing hits at remote site
public final AtomicInteger remote_rwi_peerCount; // the number of peers which contributed to the remote search result
public final AtomicInteger local_solr_available; // the number of hits generated/ranked by the local search in solr
public final AtomicInteger local_solr_stored; // the number of existing hits by the local search in solr
public final AtomicInteger remote_available; // the number of hits imported from remote peers (rwi/solr mixed)
public final AtomicInteger remote_stored; // the number of existing hits at remote site
public final AtomicInteger remote_peerCount; // the number of peers which contributed to the remote search result
public final SortedSet<byte[]> misses; // url hashes that had been sorted out because of constraints in postranking
public final AtomicInteger remote_solr_available;// the number of hits imported from remote peers (rwi/solr mixed)
public final AtomicInteger remote_solr_stored; // the number of existing hits at remote site
public final AtomicInteger remote_solr_peerCount;// the number of peers which contributed to the remote search result
public int getResultCount() {
return this.rwiStack.sizeQueue() + this.nodeStack.sizeQueue() + this.resultList.sizeAvailable();
return this.local_rwi_available.get() + local_solr_stored.get();
}
protected SearchEvent(
@ -214,14 +213,16 @@ public final class SearchEvent {
this.IAneardhthash = null;
this.localSearchThread = null;
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && peers.mySeed().getFlagAcceptRemoteIndex()));
this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering
this.local_rwi_stored = new AtomicInteger(0);
this.local_solr_available= new AtomicInteger(0);
this.local_solr_stored = new AtomicInteger(0);
this.remote_stored = new AtomicInteger(0);
this.remote_available = new AtomicInteger(0); // the number of result contributions from all the remote peers
this.remote_peerCount = new AtomicInteger(0); // the number of remote peers that have contributed
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder));
this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering
this.local_rwi_stored = new AtomicInteger(0);
this.local_solr_available = new AtomicInteger(0);
this.local_solr_stored = new AtomicInteger(0);
this.remote_rwi_stored = new AtomicInteger(0);
this.remote_rwi_available = new AtomicInteger(0); // the number of result contributions from all the remote dht peers
this.remote_rwi_peerCount = new AtomicInteger(0); // the number of remote dht peers that have contributed
this.remote_solr_stored = new AtomicInteger(0);
this.remote_solr_available= new AtomicInteger(0); // the number of result contributions from all the remote solr peers
this.remote_solr_peerCount= new AtomicInteger(0); // the number of remote solr peers that have contributed
final long start = System.currentTimeMillis();
// do a soft commit for fresh results
@ -233,8 +234,7 @@ public final class SearchEvent {
this.localSearchInclusion = null;
this.ref = new ConcurrentScoreMap<String>();
this.maxtime = query.maxtime;
int stackMaxsize = query.snippetCacheStrategy == null || query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation;
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(stackMaxsize, false);
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
this.flagcount = new int[32];
for ( int i = 0; i < 32; i++ ) {
@ -445,13 +445,13 @@ public final class SearchEvent {
this.local_rwi_stored.addAndGet(fullResource);
} else {
assert fullResource >= 0 : "fullResource = " + fullResource;
this.remote_stored.addAndGet(fullResource);
this.remote_peerCount.incrementAndGet();
this.remote_rwi_stored.addAndGet(fullResource);
this.remote_rwi_peerCount.incrementAndGet();
}
long timer = System.currentTimeMillis();
// normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime);
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime, local);
int is = index.size();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
this.query.id(true),
@ -530,7 +530,7 @@ public final class SearchEvent {
}
}
// increase counter for statistics
if (local) this.local_rwi_available.incrementAndGet(); else this.remote_available.incrementAndGet();
if (local) this.local_rwi_available.incrementAndGet(); else this.remote_rwi_available.incrementAndGet();
}
if (System.currentTimeMillis() >= timeout) Log.logWarning("SearchEvent", "rwi normalization ended with timeout = " + maxtime);
@ -650,8 +650,8 @@ public final class SearchEvent {
this.local_solr_stored.set(fullResource);
} else {
assert fullResource >= 0 : "fullResource = " + fullResource;
this.remote_stored.addAndGet(fullResource);
this.remote_peerCount.incrementAndGet();
this.remote_solr_stored.addAndGet(fullResource);
this.remote_solr_peerCount.incrementAndGet();
}
long timer = System.currentTimeMillis();
@ -785,7 +785,7 @@ public final class SearchEvent {
}
}
// increase counter for statistics
if (local) this.local_solr_available.incrementAndGet(); else this.remote_available.incrementAndGet();
if (local) this.local_solr_available.incrementAndGet(); else this.remote_solr_available.incrementAndGet();
}
} catch ( final SpaceExceededException e ) {
}
@ -819,7 +819,7 @@ public final class SearchEvent {
rwi = this.rwiStack.poll();
if (rwi == null) return null;
if (!skipDoubleDom) {
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi.getElement(), rwi.getWeight());
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) continue pollloop;
return node;
}
@ -832,9 +832,9 @@ public final class SearchEvent {
m = this.doubleDomCache.get(hosthash);
if (m == null) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue<WordReferenceVars>(this.query.snippetCacheStrategy == null || this.query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation, false);
m = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
this.doubleDomCache.put(hosthash, m);
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi.getElement(), rwi.getWeight());
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) continue pollloop;
return node;
}
@ -894,8 +894,12 @@ public final class SearchEvent {
//Log.logWarning("SearchEvent", "bestEntry == null (2)");
return null;
}
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(bestEntry.getElement(), bestEntry.getWeight());
if (node == null) continue mainloop;
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(bestEntry);
if (node == null) {
if (bestEntry.getElement().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
if (log.isFine()) log.logFine("dropped RWI: hash not in metadata");
continue mainloop;
}
return node;
}
}
@ -916,14 +920,15 @@ public final class SearchEvent {
while ((page = pullOneRWI(skipDoubleDom)) != null) {
if (!this.query.urlMask_isCatchall && !page.matches(this.query.urlMask)) {
// check url mask
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: no match with urlMask");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
// check for more errors
if (page.url() == null) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: url == null");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; // rare case where the url is corrupted
}
@ -933,13 +938,15 @@ public final class SearchEvent {
(this.query.contentdom == Classification.ContentDomain.AUDIO && page.url().getContentDomain() != Classification.ContentDomain.AUDIO) ||
(this.query.contentdom == Classification.ContentDomain.VIDEO && page.url().getContentDomain() != Classification.ContentDomain.VIDEO) ||
(this.query.contentdom == Classification.ContentDomain.APP && page.url().getContentDomain() != Classification.ContentDomain.APP)) && this.query.urlMask_isCatchall) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: wrong contentdom = " + this.query.contentdom + ", domain = " + page.url().getContentDomain());
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
// Check for blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page)) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: url is blacklisted in url blacklist");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
@ -947,7 +954,8 @@ public final class SearchEvent {
if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false)) {
FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null && !f.isListed(page.url(), null)) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: url is blacklisted in contentcontrol");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
}
@ -961,7 +969,8 @@ public final class SearchEvent {
((QueryParams.anymatch(pagetitle, this.query.getQueryGoal().getExcludeHashes()))
|| (QueryParams.anymatch(pageurl.toLowerCase(), this.query.getQueryGoal().getExcludeHashes()))
|| (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.getQueryGoal().getExcludeHashes())))) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: no match with query goal exclusion");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
@ -971,13 +980,15 @@ public final class SearchEvent {
while (wi.hasNext()) {
this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
}
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: url does not match index-of constraint");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
// check location constraint
if ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_haslocation)) && (page.lat() == 0.0 || page.lon() == 0.0)) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: location constraint");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
@ -988,14 +999,16 @@ public final class SearchEvent {
double lonDelta = this.query.lon - lon;
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
if (distance > this.query.radius) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: radius constraint");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
}
// check Scanner
if (this.query.filterscannerfail && !Scanner.acceptURL(page.url())) {
this.misses.add(page.hash());
if (log.isFine()) log.logFine("dropped RWI: url not accepted by scanner");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}

@ -66,11 +66,11 @@ public class ReferenceOrder {
this.language = language;
}
public BlockingQueue<WordReferenceVars> normalizeWith(final ReferenceContainer<WordReference> container, long maxtime) {
public BlockingQueue<WordReferenceVars> normalizeWith(final ReferenceContainer<WordReference> container, long maxtime, final boolean local) {
final LinkedBlockingQueue<WordReferenceVars> out = new LinkedBlockingQueue<WordReferenceVars>();
int threads = cores;
if (container.size() < 100) threads = 2;
final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime);
final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime, local);
distributor.start();
// return the resulting queue while the processing queues are still working
@ -83,18 +83,20 @@ public class ReferenceOrder {
LinkedBlockingQueue<WordReferenceVars> out;
private final int threads;
private final long maxtime;
public NormalizeDistributor(final ReferenceContainer<WordReference> container, final LinkedBlockingQueue<WordReferenceVars> out, final int threads, final long maxtime) {
private final boolean local;
public NormalizeDistributor(final ReferenceContainer<WordReference> container, final LinkedBlockingQueue<WordReferenceVars> out, final int threads, final long maxtime, final boolean local) {
this.container = container;
this.out = out;
this.threads = threads;
this.maxtime = maxtime;
this.local = local;
}
@Override
public void run() {
// transform the reference container into a stream of parsed entries
final BlockingQueue<WordReferenceVars> vars = WordReferenceVars.transform(this.container, this.maxtime);
final BlockingQueue<WordReferenceVars> vars = WordReferenceVars.transform(this.container, this.maxtime, this.local);
// start the transformation threads
final Semaphore termination = new Semaphore(this.threads);

Loading…
Cancel
Save