diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index 91910542d..aa1117e06 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -33,7 +33,10 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.kelondro.data.meta.URIMetadata; +import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.logging.Log; @@ -45,6 +48,8 @@ import net.yacy.kelondro.rwi.Reference; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.util.ByteArray; +import org.apache.solr.common.SolrDocument; + public class WordReferenceVars extends AbstractReference implements WordReference, Reference, Cloneable, Comparable, Comparator { @@ -55,12 +60,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc private static int cores = Runtime.getRuntime().availableProcessors(); public static final byte[] default_language = UTF8.getBytes("uk"); - public Bitfield flags; + public final Bitfield flags; public long lastModified; - public byte[] language; - public byte[] urlHash; + public final byte[] language; + public final byte[] urlHash; private String hostHash = null; - public char type; + public final char type; public int hitcount, llocal, lother, phrasesintext, posinphrase, posofphrase, urlcomps, urllength, @@ -69,6 +74,31 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc private final Queue positions; public double termFrequency; + public WordReferenceVars(final SolrDocument doc) { + URIMetadata md = new URIMetadataNode(doc); + this.language = md.language(); + this.flags = md.flags(); + this.lastModified = md.moddate().getTime(); + this.urlHash = md.hash(); + this.type = md.doctype(); + this.llocal = md.llocal(); + this.lother = md.lother(); + this.positions = new LinkedBlockingQueue(); + this.positions.add(1); + String urlNormalform = md.url().toNormalform(true, false); + this.urlcomps = MultiProtocolURI.urlComps(urlNormalform).length; + this.urllength = urlNormalform.length(); + this.virtualAge = -1; // compute that later + // the following fields cannot be computed here very easy and are just filled with dummy values + this.phrasesintext = 1; + this.hitcount = 1; + this.posinphrase = 1; + this.posofphrase = 1; + this.wordsintext = 1; + this.wordsintitle = 1; + this.termFrequency = 1; + } + public WordReferenceVars( final byte[] urlHash, final int urlLength, // byte-length of complete URL diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 01ab8e629..ad7f03599 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -323,6 +323,15 @@ public final class QueryParams { return this.domType == Searchdom.LOCAL; } + public String solrQuery() { + if (this.query_include_words == null || this.query_include_words.size() == 0) return null; + StringBuilder sb = new StringBuilder(80); + for (String s: this.query_include_words) {sb.append('+'); sb.append(s);} + for (String s: this.query_exclude_words) {sb.append("+-"); sb.append(s);} + if (sb.length() == 0) return null; + return "text_t:" + sb.substring(1, sb.length()); + } + public static HandleSet hashes2Set(final String query) { final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0); if (query != null) { diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index 2bd873fd4..60a75e275 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -26,6 +26,7 @@ package net.yacy.search.query; +import java.io.IOException; import java.util.Comparator; import java.util.ConcurrentModificationException; import java.util.HashMap; @@ -74,6 +75,10 @@ import net.yacy.search.index.Segment; import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.snippet.ResultEntry; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; + import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; @@ -193,6 +198,30 @@ public final class RWIProcess extends Thread @Override public void run() { + + // start a concurrent solr search + if (this.query.query_include_words != null) { + Thread solrSearch = new Thread() { + @Override + public void run() { + Thread.currentThread().setName("SearchEvent.solrSearch"); + String solrQuery = RWIProcess.this.query.solrQuery(); + try { + ReferenceContainer wr = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, null); + SolrDocumentList sdl = RWIProcess.this.query.getSegment().getSolr().query(solrQuery, 0, 20); + for (SolrDocument d : sdl) { + try {wr.add(new WordReferenceVars(d));} catch (SpaceExceededException e) {} + } + Log.logInfo("SearchEvent", "added " + wr.size() + " hits from solr to ranking process"); + RWIProcess.this.add(wr, true, "embedded solr", sdl.size(), 60000); + } catch (SolrException e) { + } catch (IOException e) { + } + } + }; + solrSearch.start(); + } + // do a search oneFeederStarted();