using the solr search index to concurrently search within solr and the

rwis during local search requests.
pull/1/head
Michael Peter Christen 13 years ago
parent 6197caf698
commit 395b78a0d8

@ -33,7 +33,10 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
@ -45,6 +48,8 @@ import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.ByteArray; import net.yacy.kelondro.util.ByteArray;
import org.apache.solr.common.SolrDocument;
public class WordReferenceVars extends AbstractReference implements WordReference, Reference, Cloneable, Comparable<WordReferenceVars>, Comparator<WordReferenceVars> { public class WordReferenceVars extends AbstractReference implements WordReference, Reference, Cloneable, Comparable<WordReferenceVars>, Comparator<WordReferenceVars> {
@ -55,12 +60,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private static int cores = Runtime.getRuntime().availableProcessors(); private static int cores = Runtime.getRuntime().availableProcessors();
public static final byte[] default_language = UTF8.getBytes("uk"); public static final byte[] default_language = UTF8.getBytes("uk");
public Bitfield flags; public final Bitfield flags;
public long lastModified; public long lastModified;
public byte[] language; public final byte[] language;
public byte[] urlHash; public final byte[] urlHash;
private String hostHash = null; private String hostHash = null;
public char type; public final char type;
public int hitcount, llocal, lother, phrasesintext, public int hitcount, llocal, lother, phrasesintext,
posinphrase, posofphrase, posinphrase, posofphrase,
urlcomps, urllength, urlcomps, urllength,
@ -69,6 +74,31 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private final Queue<Integer> positions; private final Queue<Integer> positions;
public double termFrequency; public double termFrequency;
public WordReferenceVars(final SolrDocument doc) {
URIMetadata md = new URIMetadataNode(doc);
this.language = md.language();
this.flags = md.flags();
this.lastModified = md.moddate().getTime();
this.urlHash = md.hash();
this.type = md.doctype();
this.llocal = md.llocal();
this.lother = md.lother();
this.positions = new LinkedBlockingQueue<Integer>();
this.positions.add(1);
String urlNormalform = md.url().toNormalform(true, false);
this.urlcomps = MultiProtocolURI.urlComps(urlNormalform).length;
this.urllength = urlNormalform.length();
this.virtualAge = -1; // compute that later
// the following fields cannot be computed here very easy and are just filled with dummy values
this.phrasesintext = 1;
this.hitcount = 1;
this.posinphrase = 1;
this.posofphrase = 1;
this.wordsintext = 1;
this.wordsintitle = 1;
this.termFrequency = 1;
}
public WordReferenceVars( public WordReferenceVars(
final byte[] urlHash, final byte[] urlHash,
final int urlLength, // byte-length of complete URL final int urlLength, // byte-length of complete URL

@ -323,6 +323,15 @@ public final class QueryParams {
return this.domType == Searchdom.LOCAL; return this.domType == Searchdom.LOCAL;
} }
public String solrQuery() {
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
StringBuilder sb = new StringBuilder(80);
for (String s: this.query_include_words) {sb.append('+'); sb.append(s);}
for (String s: this.query_exclude_words) {sb.append("+-"); sb.append(s);}
if (sb.length() == 0) return null;
return "text_t:" + sb.substring(1, sb.length());
}
public static HandleSet hashes2Set(final String query) { public static HandleSet hashes2Set(final String query) {
final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0); final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
if (query != null) { if (query != null) {

@ -26,6 +26,7 @@
package net.yacy.search.query; package net.yacy.search.query;
import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import java.util.ConcurrentModificationException; import java.util.ConcurrentModificationException;
import java.util.HashMap; import java.util.HashMap;
@ -74,6 +75,10 @@ import net.yacy.search.index.Segment;
import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.ResultEntry;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Resource;
@ -193,6 +198,30 @@ public final class RWIProcess extends Thread
@Override @Override
public void run() { public void run() {
// start a concurrent solr search
if (this.query.query_include_words != null) {
Thread solrSearch = new Thread() {
@Override
public void run() {
Thread.currentThread().setName("SearchEvent.solrSearch");
String solrQuery = RWIProcess.this.query.solrQuery();
try {
ReferenceContainer<WordReference> wr = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, null);
SolrDocumentList sdl = RWIProcess.this.query.getSegment().getSolr().query(solrQuery, 0, 20);
for (SolrDocument d : sdl) {
try {wr.add(new WordReferenceVars(d));} catch (SpaceExceededException e) {}
}
Log.logInfo("SearchEvent", "added " + wr.size() + " hits from solr to ranking process");
RWIProcess.this.add(wr, true, "embedded solr", sdl.size(), 60000);
} catch (SolrException e) {
} catch (IOException e) {
}
}
};
solrSearch.start();
}
// do a search // do a search
oneFeederStarted(); oneFeederStarted();

Loading…
Cancel
Save