corrected result counter

pull/1/head
orbiter 12 years ago
parent 2555542f7a
commit d74472f562

@ -549,15 +549,6 @@ public class IndexControlRWIs_p {
break; break;
} }
} }
final Iterator<byte[]> iter = theSearch.misses.iterator(); // iterates url hash strings
byte[] b;
while ( iter.hasNext() ) {
b = iter.next();
prop.put("genUrlList_urlList_" + i + "_urlExists", "0");
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxCount", i);
prop.putHTML("genUrlList_urlList_" + i + "_urlExists_urlhxValue", b);
i++;
}
prop.put("genUrlList_urlList", i); prop.put("genUrlList_urlList", i);
prop.putHTML("genUrlList_keyString", keystring); prop.putHTML("genUrlList_keyString", keystring);
prop.put("genUrlList_count", i); prop.put("genUrlList_count", i);

@ -733,12 +733,14 @@ public class yacysearch {
+ " - " + " - "
+ "local_rwi_available(" + theSearch.local_rwi_available.get() + "), " + "local_rwi_available(" + theSearch.local_rwi_available.get() + "), "
+ "local_rwi_stored(" + theSearch.local_rwi_stored.get() + "), " + "local_rwi_stored(" + theSearch.local_rwi_stored.get() + "), "
+ "remote_rwi_available(" + theSearch.remote_rwi_available.get() + "), "
+ "remote_rwi_stored(" + theSearch.remote_rwi_stored.get() + "), "
+ "remote_rwi_peerCount(" + theSearch.remote_rwi_peerCount.get() + "), "
+ "local_solr_available(" + theSearch.local_solr_available.get() + "), " + "local_solr_available(" + theSearch.local_solr_available.get() + "), "
+ "local_solr_stored(" + theSearch.local_solr_stored.get() + "), " + "local_solr_stored(" + theSearch.local_solr_stored.get() + "), "
+ "remote_available(" + theSearch.remote_available.get() + "), " + "remote_solr_available(" + theSearch.remote_solr_available.get() + "), "
+ "remote_stored(" + theSearch.remote_stored.get() + "), " + "remote_solr_stored(" + theSearch.remote_solr_stored.get() + "), "
+ "remote_peerCount(" + theSearch.remote_peerCount.get() + "), " + "remote_solr_peerCount(" + theSearch.remote_solr_peerCount.get() + "), "
+ "local_sortout(" + theSearch.misses.size() + "), "
+ (System.currentTimeMillis() - timestamp) + (System.currentTimeMillis() - timestamp)
+ " ms"); + " ms");
@ -827,10 +829,9 @@ public class yacysearch {
prop.put("num-results_totalcount", Formatter.number(theSearch.getResultCount())); prop.put("num-results_totalcount", Formatter.number(theSearch.getResultCount()));
prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch) ? "1" : "0"); prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch) ? "1" : "0");
prop.put("num-results_globalresults_localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true)); prop.put("num-results_globalresults_localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true));
prop.put("num-results_globalresults_localMissCount", Formatter.number(theSearch.misses.size(), true)); prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_available.get(), true)); prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_stored.get(), true)); prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true));
// compose page navigation // compose page navigation
final StringBuilder resnav = new StringBuilder(200); final StringBuilder resnav = new StringBuilder(200);

@ -98,10 +98,9 @@ public class yacysearchitem {
prop.put("itemsperpage", Formatter.number(theSearch.query.itemsPerPage)); prop.put("itemsperpage", Formatter.number(theSearch.query.itemsPerPage));
prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true)); prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true));
prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true)); prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true));
prop.put("localMissCount", Formatter.number(theSearch.misses.size(), true)); prop.put("remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.remote_stored.get(), true)); prop.put("remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.remote_available.get(), true)); prop.put("remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true));
prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString()); prop.put("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString());
final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, ""); final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, "");

@ -38,10 +38,9 @@ public class yacysearchlatestinfo {
prop.put("itemsperpage", theSearch.query.itemsPerPage); prop.put("itemsperpage", theSearch.query.itemsPerPage);
prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true)); prop.put("totalcount", Formatter.number(theSearch.getResultCount(), true));
prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true)); prop.put("localResourceSize", Formatter.number(theSearch.local_rwi_available.get() + theSearch.local_solr_available.get(), true));
prop.put("localMissCount", Formatter.number(theSearch.misses.size(), true)); prop.put("remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
prop.put("remoteResourceSize", Formatter.number(theSearch.remote_stored.get(), true)); prop.put("remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
prop.put("remoteIndexCount", Formatter.number(theSearch.remote_available.get(), true)); prop.put("remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
prop.put("remotePeerCount", Formatter.number(theSearch.remote_peerCount.get(), true));
prop.putJSON("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString()); prop.putJSON("navurlBase", QueryParams.navurlBase("html", theSearch.query, null).toString());
return prop; return prop;

@ -159,15 +159,31 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
} }
private Element<E> takeUnsafe() { private Element<E> takeUnsafe() {
final Element<E> element = this.queue.first(); final Element<E> element = this.queue.pollFirst();
assert element != null; assert element != null;
this.queue.remove(element);
if (this.drained != null && (this.maxsize == -1 || this.drained.size() < this.maxsize)) this.drained.add(element); if (this.drained != null && (this.maxsize == -1 || this.drained.size() < this.maxsize)) this.drained.add(element);
assert this.queue.size() >= this.enqueued.availablePermits() : "(take) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits(); assert this.queue.size() >= this.enqueued.availablePermits() : "(take) queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits();
return element; return element;
} }
/**
* remove a drained element
* @param element
*/
/*
public void removeDrained(Element<E> element) {
if (element == null) return;
synchronized (this.drained) {
int p = this.drained.size() - 1;
if (this.drained.get(p) == element) {
this.drained.remove(p);
return;
}
}
this.drained.remove(element);
}
*/
/** /**
* return the element with the smallest weight, but do not remove it * return the element with the smallest weight, but do not remove it
* @return null if no element is on the queue or the head of the queue * @return null if no element is on the queue or the head of the queue

@ -36,7 +36,7 @@ public class CitationReferenceFactory implements ReferenceFactory<CitationRefere
} }
@Override @Override
public CitationReference produceFast(final CitationReference r) { public CitationReference produceFast(final CitationReference r, final boolean local) {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }

@ -40,7 +40,7 @@ import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.Bitfield; import net.yacy.kelondro.util.Bitfield;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
@ -77,8 +77,8 @@ public class URIMetadataNode {
private long ranking = -1; // during generation of a search result this value is set private long ranking = -1; // during generation of a search result this value is set
private SolrDocument doc = null; private SolrDocument doc = null;
private String snippet = null; private String snippet = null;
private WordReference word = null; // this is only used if the url is transported via remote search requests private WordReferenceVars word = null; // this is only used if the url is transported via remote search requests
public URIMetadataNode(final SolrInputDocument doc) { public URIMetadataNode(final SolrInputDocument doc) {
this(ClientUtils.toSolrDocument(doc)); this(ClientUtils.toSolrDocument(doc));
} }
@ -98,18 +98,18 @@ public class URIMetadataNode {
} }
} }
public URIMetadataNode(final SolrInputDocument doc, final WordReference searchedWord, final long ranking) { public URIMetadataNode(final SolrInputDocument doc, final WordReferenceVars searchedWord, final long ranking) {
this(ClientUtils.toSolrDocument(doc)); this(ClientUtils.toSolrDocument(doc));
this.word = searchedWord; this.word = searchedWord;
this.ranking = ranking; this.ranking = ranking;
} }
public URIMetadataNode(final SolrDocument doc, final WordReference searchedWord, final long ranking) { public URIMetadataNode(final SolrDocument doc, final WordReferenceVars searchedWord, final long ranking) {
this(doc); this(doc);
this.word = searchedWord; this.word = searchedWord;
this.ranking = ranking; this.ranking = ranking;
} }
public SolrDocument getDocument() { public SolrDocument getDocument() {
return this.doc; return this.doc;
} }
@ -292,7 +292,7 @@ public class URIMetadataNode {
return a.toArray(new String[a.size()]); return a.toArray(new String[a.size()]);
} }
public WordReference word() { public WordReferenceVars word() {
return this.word; return this.word;
} }
@ -429,6 +429,7 @@ public class URIMetadataNode {
return core.toString(); return core.toString();
} }
/*
private DigestURI getURL(CollectionSchema field) { private DigestURI getURL(CollectionSchema field) {
assert !field.isMultiValued(); assert !field.isMultiValued();
assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight; assert field.getType() == SolrType.string || field.getType() == SolrType.text_general || field.getType() == SolrType.text_en_splitting_tight;
@ -440,7 +441,8 @@ public class URIMetadataNode {
return null; return null;
} }
} }
*/
private int getInt(CollectionSchema field) { private int getInt(CollectionSchema field) {
assert !field.isMultiValued(); assert !field.isMultiValued();
assert field.getType() == SolrType.num_integer; assert field.getType() == SolrType.num_integer;

@ -173,7 +173,7 @@ public class URIMetadataRow {
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", "")); this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""));
this.word = null; this.word = null;
if (prop.containsKey("wi")) { if (prop.containsKey("wi")) {
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", "")))); this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))), false);
} }
this.comp = null; this.comp = null;
} }

@ -52,9 +52,9 @@ public class WordReferenceFactory implements ReferenceFactory<WordReference>, Se
} }
@Override @Override
public WordReference produceFast(final WordReference r) { public WordReference produceFast(final WordReference r, final boolean local) {
if (r instanceof WordReferenceVars) return r; if (r instanceof WordReferenceVars) return r;
return new WordReferenceVars(r); return new WordReferenceVars(r, local);
} }
@Override @Override

@ -70,8 +70,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private int virtualAge; private int virtualAge;
private final Queue<Integer> positions; private final Queue<Integer> positions;
private double termFrequency; private double termFrequency;
private final boolean local;
public WordReferenceVars(final URIMetadataRow md) { public WordReferenceVars(final URIMetadataRow md, final boolean local) {
this.language = md.language(); this.language = md.language();
this.flags = md.flags(); this.flags = md.flags();
this.lastModified = md.moddate().getTime(); this.lastModified = md.moddate().getTime();
@ -93,6 +94,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = 1; this.wordsintext = 1;
this.wordsintitle = 1; this.wordsintitle = 1;
this.termFrequency = 1; this.termFrequency = 1;
this.local = local;
} }
public WordReferenceVars( public WordReferenceVars(
@ -135,9 +137,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = wordcount; this.wordsintext = wordcount;
this.wordsintitle = titleLength; this.wordsintitle = titleLength;
this.termFrequency = termfrequency; this.termFrequency = termfrequency;
this.local = true;
} }
public WordReferenceVars(final WordReference e) { public WordReferenceVars(final WordReference e, boolean local) {
this.flags = e.flags(); this.flags = e.flags();
//this.freshUntil = e.freshUntil(); //this.freshUntil = e.freshUntil();
this.lastModified = e.lastModified(); this.lastModified = e.lastModified();
@ -158,6 +161,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = e.wordsintext(); this.wordsintext = e.wordsintext();
this.wordsintitle = e.wordsintitle(); this.wordsintitle = e.wordsintitle();
this.termFrequency = e.termFrequency(); this.termFrequency = e.termFrequency();
this.local = local;
} }
/** /**
@ -182,6 +186,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.wordsintext = 0; this.wordsintext = 0;
this.wordsintitle = 0; this.wordsintitle = 0;
this.termFrequency = 0.0; this.termFrequency = 0.0;
this.local = true;
} }
@Override @Override
@ -349,6 +354,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
if (this.termFrequency == 0.0) this.termFrequency = (((double) hitcount()) / ((double) (wordsintext() + wordsintitle() + 1))); if (this.termFrequency == 0.0) this.termFrequency = (((double) hitcount()) / ((double) (wordsintext() + wordsintitle() + 1)));
return this.termFrequency; return this.termFrequency;
} }
public boolean local() {
return this.local;
}
public final void min(final WordReferenceVars other) { public final void min(final WordReferenceVars other) {
if (other == null) return; if (other == null) return;
@ -448,13 +457,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
* @return a blocking queue filled with WordReferenceVars that is still filled when the object is returned * @return a blocking queue filled with WordReferenceVars that is still filled when the object is returned
*/ */
public static BlockingQueue<WordReferenceVars> transform(final ReferenceContainer<WordReference> container, final long maxtime) { public static BlockingQueue<WordReferenceVars> transform(final ReferenceContainer<WordReference> container, final long maxtime, final boolean local) {
final LinkedBlockingQueue<WordReferenceVars> vars = new LinkedBlockingQueue<WordReferenceVars>(); final LinkedBlockingQueue<WordReferenceVars> vars = new LinkedBlockingQueue<WordReferenceVars>();
if (container.size() <= 100) { if (container.size() <= 100) {
// transform without concurrency to omit thread creation overhead // transform without concurrency to omit thread creation overhead
for (final Row.Entry entry: container) { for (final Row.Entry entry: container) {
try { try {
vars.put(new WordReferenceVars(new WordReferenceRow(entry))); vars.put(new WordReferenceVars(new WordReferenceRow(entry), local));
} catch (final InterruptedException e) {} } catch (final InterruptedException e) {}
} }
try { try {
@ -462,7 +471,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
} catch (final InterruptedException e) {} } catch (final InterruptedException e) {}
return vars; return vars;
} }
final Thread distributor = new TransformDistributor(container, vars, maxtime); final Thread distributor = new TransformDistributor(container, vars, maxtime, local);
distributor.start(); distributor.start();
// return the resulting queue while the processing queues are still working // return the resulting queue while the processing queues are still working
@ -474,11 +483,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private ReferenceContainer<WordReference> container; private ReferenceContainer<WordReference> container;
private BlockingQueue<WordReferenceVars> out; private BlockingQueue<WordReferenceVars> out;
private long maxtime; private long maxtime;
private final boolean local;
private TransformDistributor(final ReferenceContainer<WordReference> container, final BlockingQueue<WordReferenceVars> out, final long maxtime) { private TransformDistributor(final ReferenceContainer<WordReference> container, final BlockingQueue<WordReferenceVars> out, final long maxtime, final boolean local) {
this.container = container; this.container = container;
this.out = out; this.out = out;
this.maxtime = maxtime; this.maxtime = maxtime;
this.local = local;
} }
@Override @Override
@ -487,7 +497,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final int cores0 = Math.min(cores, this.container.size() / 100) + 1; final int cores0 = Math.min(cores, this.container.size() / 100) + 1;
final TransformWorker[] worker = new TransformWorker[cores0]; final TransformWorker[] worker = new TransformWorker[cores0];
for (int i = 0; i < cores0; i++) { for (int i = 0; i < cores0; i++) {
worker[i] = new TransformWorker(this.out, this.maxtime); worker[i] = new TransformWorker(this.out, this.maxtime, this.local);
worker[i].start(); worker[i].start();
} }
long timeout = System.currentTimeMillis() + this.maxtime; long timeout = System.currentTimeMillis() + this.maxtime;
@ -525,11 +535,13 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private BlockingQueue<Row.Entry> in; private BlockingQueue<Row.Entry> in;
private BlockingQueue<WordReferenceVars> out; private BlockingQueue<WordReferenceVars> out;
private long maxtime; private long maxtime;
private final boolean local;
private TransformWorker(final BlockingQueue<WordReferenceVars> out, final long maxtime) { private TransformWorker(final BlockingQueue<WordReferenceVars> out, final long maxtime, final boolean local) {
this.in = new LinkedBlockingQueue<Row.Entry>(); this.in = new LinkedBlockingQueue<Row.Entry>();
this.out = out; this.out = out;
this.maxtime = maxtime; this.maxtime = maxtime;
this.local = local;
} }
private void add(final Row.Entry entry) { private void add(final Row.Entry entry) {
@ -545,7 +557,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
long timeout = System.currentTimeMillis() + this.maxtime; long timeout = System.currentTimeMillis() + this.maxtime;
try { try {
while ((entry = this.in.take()) != WordReferenceRow.poisonRowEntry) { while ((entry = this.in.take()) != WordReferenceRow.poisonRowEntry) {
this.out.put(new WordReferenceVars(new WordReferenceRow(entry))); this.out.put(new WordReferenceVars(new WordReferenceRow(entry), local));
if (System.currentTimeMillis() > timeout) { if (System.currentTimeMillis() > timeout) {
Log.logWarning("TransformWorker", "normalization of row entries from row to vars ended with timeout = " + this.maxtime); Log.logWarning("TransformWorker", "normalization of row entries from row to vars ended with timeout = " + this.maxtime);
break; break;

@ -432,7 +432,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
assert (ie2.urlhash().length == keylength) : "ie1.urlHash() = " + ASCII.String(ie2.urlhash()); assert (ie2.urlhash().length == keylength) : "ie1.urlHash() = " + ASCII.String(ie2.urlhash());
// this is a hit. Calculate word distance: // this is a hit. Calculate word distance:
ie1 = factory.produceFast(ie2); ie1 = factory.produceFast(ie2, true);
ie1.join(ie2); ie1.join(ie2);
if (ie1.distance() <= maxDistance) conj.add(ie1); if (ie1.distance() <= maxDistance) conj.add(ie1);
} }
@ -472,7 +472,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
if (e2.hasNext()) ie2 = e2.next(); else break; if (e2.hasNext()) ie2 = e2.next(); else break;
} else { } else {
// we have found the same urls in different searches! // we have found the same urls in different searches!
ie1 = factory.produceFast(ie1); ie1 = factory.produceFast(ie1, true);
ie1.join(ie2); ie1.join(ie2);
if (ie1.distance() <= maxDistance) conj.add(ie1); if (ie1.distance() <= maxDistance) conj.add(ie1);
if (e1.hasNext()) ie1 = e1.next(); else break; if (e1.hasNext()) ie1 = e1.next(); else break;
@ -554,7 +554,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
if (e2.hasNext()) ie2 = e2.next(); else break; if (e2.hasNext()) ie2 = e2.next(); else break;
} else { } else {
// we have found the same urls in different searches! // we have found the same urls in different searches!
ie1 = factory.produceFast(ie1); ie1 = factory.produceFast(ie1, true);
ie1.join(ie2); ie1.join(ie2);
e1.remove(); e1.remove();
if (e1.hasNext()) ie1 = e1.next(); else break; if (e1.hasNext()) ie1 = e1.next(); else break;

@ -34,6 +34,6 @@ public interface ReferenceFactory<ReferenceType extends Reference> {
public ReferenceType produceSlow(Row.Entry e); public ReferenceType produceSlow(Row.Entry e);
public ReferenceType produceFast(ReferenceType e); public ReferenceType produceFast(ReferenceType e, final boolean local);
} }

@ -377,7 +377,7 @@ public class WebStructureGraph {
} }
@Override @Override
public HostReference produceFast(final HostReference e) { public HostReference produceFast(final HostReference e, final boolean local) {
return e; return e;
} }

@ -54,6 +54,7 @@ import net.yacy.cora.federate.solr.instance.ShardInstance;
import net.yacy.cora.order.CloneableIterator; import net.yacy.cora.order.CloneableIterator;
import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.ZIPReader; import net.yacy.cora.storage.ZIPReader;
import net.yacy.cora.storage.ZIPWriter; import net.yacy.cora.storage.ZIPWriter;
import net.yacy.document.parser.html.CharacterCoding; import net.yacy.document.parser.html.CharacterCoding;
@ -61,6 +62,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.index.Cache; import net.yacy.kelondro.index.Cache;
import net.yacy.kelondro.index.Index; import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row;
@ -315,9 +317,13 @@ public final class Fulltext {
} }
} }
public URIMetadataNode getMetadata(WordReference wre, long weight) { public URIMetadataNode getMetadata(WeakPriorityBlockingQueue.Element<WordReferenceVars> element) {
if (element == null) return null;
WordReferenceVars wre = element.getElement();
long weight = element.getWeight();
if (wre == null) return null; // all time was already wasted in takeRWI to get another element if (wre == null) return null; // all time was already wasted in takeRWI to get another element
return getMetadata(wre.urlhash(), wre, weight); URIMetadataNode node = getMetadata(wre.urlhash(), wre, weight);
return node;
} }
public URIMetadataNode getMetadata(final byte[] urlHash) { public URIMetadataNode getMetadata(final byte[] urlHash) {
@ -325,7 +331,7 @@ public final class Fulltext {
return getMetadata(urlHash, null, 0); return getMetadata(urlHash, null, 0);
} }
private URIMetadataNode getMetadata(final byte[] urlHash, WordReference wre, long weight) { private URIMetadataNode getMetadata(final byte[] urlHash, WordReferenceVars wre, long weight) {
// get the metadata from Solr // get the metadata from Solr
try { try {

@ -28,16 +28,13 @@ package net.yacy.search.query;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.ConcurrentModificationException; import java.util.ConcurrentModificationException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -98,7 +95,7 @@ import net.yacy.search.snippet.TextSnippet.ResultClass;
public final class SearchEvent { public final class SearchEvent {
private static final int max_results_preparation = 3000, max_results_preparation_special = -1; // -1 means 'no limit' private static final int max_results_rwi = 3000;
private static long noRobinsonLocalRWISearch = 0; private static long noRobinsonLocalRWISearch = 0;
static { static {
@ -160,15 +157,17 @@ public final class SearchEvent {
// the following values are filled during the search process as statistics for the search // the following values are filled during the search process as statistics for the search
public final AtomicInteger local_rwi_available; // the number of hits generated/ranked by the local search in rwi index public final AtomicInteger local_rwi_available; // the number of hits generated/ranked by the local search in rwi index
public final AtomicInteger local_rwi_stored; // the number of existing hits by the local search in rwi index public final AtomicInteger local_rwi_stored; // the number of existing hits by the local search in rwi index
public final AtomicInteger remote_rwi_available; // the number of hits imported from remote peers (rwi/solr mixed)
public final AtomicInteger remote_rwi_stored; // the number of existing hits at remote site
public final AtomicInteger remote_rwi_peerCount; // the number of peers which contributed to the remote search result
public final AtomicInteger local_solr_available; // the number of hits generated/ranked by the local search in solr public final AtomicInteger local_solr_available; // the number of hits generated/ranked by the local search in solr
public final AtomicInteger local_solr_stored; // the number of existing hits by the local search in solr public final AtomicInteger local_solr_stored; // the number of existing hits by the local search in solr
public final AtomicInteger remote_available; // the number of hits imported from remote peers (rwi/solr mixed) public final AtomicInteger remote_solr_available;// the number of hits imported from remote peers (rwi/solr mixed)
public final AtomicInteger remote_stored; // the number of existing hits at remote site public final AtomicInteger remote_solr_stored; // the number of existing hits at remote site
public final AtomicInteger remote_peerCount; // the number of peers which contributed to the remote search result public final AtomicInteger remote_solr_peerCount;// the number of peers which contributed to the remote search result
public final SortedSet<byte[]> misses; // url hashes that had been sorted out because of constraints in postranking
public int getResultCount() { public int getResultCount() {
return this.rwiStack.sizeQueue() + this.nodeStack.sizeQueue() + this.resultList.sizeAvailable(); return this.local_rwi_available.get() + local_solr_stored.get();
} }
protected SearchEvent( protected SearchEvent(
@ -214,14 +213,16 @@ public final class SearchEvent {
this.IAneardhthash = null; this.IAneardhthash = null;
this.localSearchThread = null; this.localSearchThread = null;
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && peers.mySeed().getFlagAcceptRemoteIndex())); this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && peers.mySeed().getFlagAcceptRemoteIndex()));
this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering this.local_rwi_available = new AtomicInteger(0); // the number of results in the local peer after filtering
this.local_rwi_stored = new AtomicInteger(0); this.local_rwi_stored = new AtomicInteger(0);
this.local_solr_available= new AtomicInteger(0); this.local_solr_available = new AtomicInteger(0);
this.local_solr_stored = new AtomicInteger(0); this.local_solr_stored = new AtomicInteger(0);
this.remote_stored = new AtomicInteger(0); this.remote_rwi_stored = new AtomicInteger(0);
this.remote_available = new AtomicInteger(0); // the number of result contributions from all the remote peers this.remote_rwi_available = new AtomicInteger(0); // the number of result contributions from all the remote dht peers
this.remote_peerCount = new AtomicInteger(0); // the number of remote peers that have contributed this.remote_rwi_peerCount = new AtomicInteger(0); // the number of remote dht peers that have contributed
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder)); this.remote_solr_stored = new AtomicInteger(0);
this.remote_solr_available= new AtomicInteger(0); // the number of result contributions from all the remote solr peers
this.remote_solr_peerCount= new AtomicInteger(0); // the number of remote solr peers that have contributed
final long start = System.currentTimeMillis(); final long start = System.currentTimeMillis();
// do a soft commit for fresh results // do a soft commit for fresh results
@ -233,8 +234,7 @@ public final class SearchEvent {
this.localSearchInclusion = null; this.localSearchInclusion = null;
this.ref = new ConcurrentScoreMap<String>(); this.ref = new ConcurrentScoreMap<String>();
this.maxtime = query.maxtime; this.maxtime = query.maxtime;
int stackMaxsize = query.snippetCacheStrategy == null || query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation; this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(stackMaxsize, false);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>(); this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
this.flagcount = new int[32]; this.flagcount = new int[32];
for ( int i = 0; i < 32; i++ ) { for ( int i = 0; i < 32; i++ ) {
@ -445,13 +445,13 @@ public final class SearchEvent {
this.local_rwi_stored.addAndGet(fullResource); this.local_rwi_stored.addAndGet(fullResource);
} else { } else {
assert fullResource >= 0 : "fullResource = " + fullResource; assert fullResource >= 0 : "fullResource = " + fullResource;
this.remote_stored.addAndGet(fullResource); this.remote_rwi_stored.addAndGet(fullResource);
this.remote_peerCount.incrementAndGet(); this.remote_rwi_peerCount.incrementAndGet();
} }
long timer = System.currentTimeMillis(); long timer = System.currentTimeMillis();
// normalize entries // normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime); final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime, local);
int is = index.size(); int is = index.size();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch( EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
this.query.id(true), this.query.id(true),
@ -530,7 +530,7 @@ public final class SearchEvent {
} }
} }
// increase counter for statistics // increase counter for statistics
if (local) this.local_rwi_available.incrementAndGet(); else this.remote_available.incrementAndGet(); if (local) this.local_rwi_available.incrementAndGet(); else this.remote_rwi_available.incrementAndGet();
} }
if (System.currentTimeMillis() >= timeout) Log.logWarning("SearchEvent", "rwi normalization ended with timeout = " + maxtime); if (System.currentTimeMillis() >= timeout) Log.logWarning("SearchEvent", "rwi normalization ended with timeout = " + maxtime);
@ -650,8 +650,8 @@ public final class SearchEvent {
this.local_solr_stored.set(fullResource); this.local_solr_stored.set(fullResource);
} else { } else {
assert fullResource >= 0 : "fullResource = " + fullResource; assert fullResource >= 0 : "fullResource = " + fullResource;
this.remote_stored.addAndGet(fullResource); this.remote_solr_stored.addAndGet(fullResource);
this.remote_peerCount.incrementAndGet(); this.remote_solr_peerCount.incrementAndGet();
} }
long timer = System.currentTimeMillis(); long timer = System.currentTimeMillis();
@ -785,7 +785,7 @@ public final class SearchEvent {
} }
} }
// increase counter for statistics // increase counter for statistics
if (local) this.local_solr_available.incrementAndGet(); else this.remote_available.incrementAndGet(); if (local) this.local_solr_available.incrementAndGet(); else this.remote_solr_available.incrementAndGet();
} }
} catch ( final SpaceExceededException e ) { } catch ( final SpaceExceededException e ) {
} }
@ -819,7 +819,7 @@ public final class SearchEvent {
rwi = this.rwiStack.poll(); rwi = this.rwiStack.poll();
if (rwi == null) return null; if (rwi == null) return null;
if (!skipDoubleDom) { if (!skipDoubleDom) {
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi.getElement(), rwi.getWeight()); URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) continue pollloop; if (node == null) continue pollloop;
return node; return node;
} }
@ -832,9 +832,9 @@ public final class SearchEvent {
m = this.doubleDomCache.get(hosthash); m = this.doubleDomCache.get(hosthash);
if (m == null) { if (m == null) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned // first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue<WordReferenceVars>(this.query.snippetCacheStrategy == null || this.query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation, false); m = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
this.doubleDomCache.put(hosthash, m); this.doubleDomCache.put(hosthash, m);
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi.getElement(), rwi.getWeight()); URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) continue pollloop; if (node == null) continue pollloop;
return node; return node;
} }
@ -894,8 +894,12 @@ public final class SearchEvent {
//Log.logWarning("SearchEvent", "bestEntry == null (2)"); //Log.logWarning("SearchEvent", "bestEntry == null (2)");
return null; return null;
} }
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(bestEntry.getElement(), bestEntry.getWeight()); URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(bestEntry);
if (node == null) continue mainloop; if (node == null) {
if (bestEntry.getElement().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
if (log.isFine()) log.logFine("dropped RWI: hash not in metadata");
continue mainloop;
}
return node; return node;
} }
} }
@ -916,14 +920,15 @@ public final class SearchEvent {
while ((page = pullOneRWI(skipDoubleDom)) != null) { while ((page = pullOneRWI(skipDoubleDom)) != null) {
if (!this.query.urlMask_isCatchall && !page.matches(this.query.urlMask)) { if (!this.query.urlMask_isCatchall && !page.matches(this.query.urlMask)) {
// check url mask if (log.isFine()) log.logFine("dropped RWI: no match with urlMask");
this.misses.add(page.hash()); if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
// check for more errors // check for more errors
if (page.url() == null) { if (page.url() == null) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: url == null");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; // rare case where the url is corrupted continue; // rare case where the url is corrupted
} }
@ -933,13 +938,15 @@ public final class SearchEvent {
(this.query.contentdom == Classification.ContentDomain.AUDIO && page.url().getContentDomain() != Classification.ContentDomain.AUDIO) || (this.query.contentdom == Classification.ContentDomain.AUDIO && page.url().getContentDomain() != Classification.ContentDomain.AUDIO) ||
(this.query.contentdom == Classification.ContentDomain.VIDEO && page.url().getContentDomain() != Classification.ContentDomain.VIDEO) || (this.query.contentdom == Classification.ContentDomain.VIDEO && page.url().getContentDomain() != Classification.ContentDomain.VIDEO) ||
(this.query.contentdom == Classification.ContentDomain.APP && page.url().getContentDomain() != Classification.ContentDomain.APP)) && this.query.urlMask_isCatchall) { (this.query.contentdom == Classification.ContentDomain.APP && page.url().getContentDomain() != Classification.ContentDomain.APP)) && this.query.urlMask_isCatchall) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: wrong contentdom = " + this.query.contentdom + ", domain = " + page.url().getContentDomain());
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
// Check for blacklist // Check for blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page)) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: url is blacklisted in url blacklist");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
@ -947,7 +954,8 @@ public final class SearchEvent {
if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false)) { if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false)) {
FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter(); FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null && !f.isListed(page.url(), null)) { if (f != null && !f.isListed(page.url(), null)) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: url is blacklisted in contentcontrol");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
} }
@ -961,7 +969,8 @@ public final class SearchEvent {
((QueryParams.anymatch(pagetitle, this.query.getQueryGoal().getExcludeHashes())) ((QueryParams.anymatch(pagetitle, this.query.getQueryGoal().getExcludeHashes()))
|| (QueryParams.anymatch(pageurl.toLowerCase(), this.query.getQueryGoal().getExcludeHashes())) || (QueryParams.anymatch(pageurl.toLowerCase(), this.query.getQueryGoal().getExcludeHashes()))
|| (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.getQueryGoal().getExcludeHashes())))) { || (QueryParams.anymatch(pageauthor.toLowerCase(), this.query.getQueryGoal().getExcludeHashes())))) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: no match with query goal exclusion");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
@ -971,13 +980,15 @@ public final class SearchEvent {
while (wi.hasNext()) { while (wi.hasNext()) {
this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash()); this.query.getSegment().termIndex().removeDelayed(wi.next(), page.hash());
} }
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: url does not match index-of constraint");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
// check location constraint // check location constraint
if ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_haslocation)) && (page.lat() == 0.0 || page.lon() == 0.0)) { if ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_haslocation)) && (page.lat() == 0.0 || page.lon() == 0.0)) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: location constraint");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
@ -988,14 +999,16 @@ public final class SearchEvent {
double lonDelta = this.query.lon - lon; double lonDelta = this.query.lon - lon;
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
if (distance > this.query.radius) { if (distance > this.query.radius) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: radius constraint");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }
} }
// check Scanner // check Scanner
if (this.query.filterscannerfail && !Scanner.acceptURL(page.url())) { if (this.query.filterscannerfail && !Scanner.acceptURL(page.url())) {
this.misses.add(page.hash()); if (log.isFine()) log.logFine("dropped RWI: url not accepted by scanner");
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue; continue;
} }

@ -66,11 +66,11 @@ public class ReferenceOrder {
this.language = language; this.language = language;
} }
public BlockingQueue<WordReferenceVars> normalizeWith(final ReferenceContainer<WordReference> container, long maxtime) { public BlockingQueue<WordReferenceVars> normalizeWith(final ReferenceContainer<WordReference> container, long maxtime, final boolean local) {
final LinkedBlockingQueue<WordReferenceVars> out = new LinkedBlockingQueue<WordReferenceVars>(); final LinkedBlockingQueue<WordReferenceVars> out = new LinkedBlockingQueue<WordReferenceVars>();
int threads = cores; int threads = cores;
if (container.size() < 100) threads = 2; if (container.size() < 100) threads = 2;
final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime); final Thread distributor = new NormalizeDistributor(container, out, threads, maxtime, local);
distributor.start(); distributor.start();
// return the resulting queue while the processing queues are still working // return the resulting queue while the processing queues are still working
@ -83,18 +83,20 @@ public class ReferenceOrder {
LinkedBlockingQueue<WordReferenceVars> out; LinkedBlockingQueue<WordReferenceVars> out;
private final int threads; private final int threads;
private final long maxtime; private final long maxtime;
private final boolean local;
public NormalizeDistributor(final ReferenceContainer<WordReference> container, final LinkedBlockingQueue<WordReferenceVars> out, final int threads, final long maxtime) {
public NormalizeDistributor(final ReferenceContainer<WordReference> container, final LinkedBlockingQueue<WordReferenceVars> out, final int threads, final long maxtime, final boolean local) {
this.container = container; this.container = container;
this.out = out; this.out = out;
this.threads = threads; this.threads = threads;
this.maxtime = maxtime; this.maxtime = maxtime;
this.local = local;
} }
@Override @Override
public void run() { public void run() {
// transform the reference container into a stream of parsed entries // transform the reference container into a stream of parsed entries
final BlockingQueue<WordReferenceVars> vars = WordReferenceVars.transform(this.container, this.maxtime); final BlockingQueue<WordReferenceVars> vars = WordReferenceVars.transform(this.container, this.maxtime, this.local);
// start the transformation threads // start the transformation threads
final Semaphore termination = new Semaphore(this.threads); final Semaphore termination = new Semaphore(this.threads);

Loading…
Cancel
Save