diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index 67b7a321b..e2f6f31d3 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -70,7 +70,7 @@ public abstract class AbstractSolrConnector implements SolrConnector { catchSuccessQuery.setRows(0); catchSuccessQuery.setStart(0); } - private final static int pagesize = 100; + protected final static int pagesize = 100; @Override public boolean existsByQuery(final String query) throws IOException { @@ -83,10 +83,10 @@ public abstract class AbstractSolrConnector implements SolrConnector { } @Override - public Object getFieldById(final String key, final String field) throws IOException { + public String getFieldById(final String key, final String field) throws IOException { SolrDocument doc = getDocumentById(key, field); if (doc == null) return null; - return doc.getFieldValue(field); + return doc.getFieldValue(field).toString(); } /** @@ -329,11 +329,11 @@ public abstract class AbstractSolrConnector implements SolrConnector { } @Override - public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException { + public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException { final SolrQuery query = new SolrQuery(); - assert key.length() == 12; + assert id.length() == 12; // construct query - query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + key); + query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id); query.clearSorts(); query.setRows(1); query.setStart(0); diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java index 47fae1aab..8eff5f315 100644 --- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java @@ -377,11 +377,11 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { } @Override - public Object getFieldById(String id, String field) throws IOException { + public String getFieldById(String id, String field) throws IOException { if (existIdFromDeleteQueue(id)) return null; SolrInputDocument doc = getFromUpdateQueue(id); - if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field);} - Object val = this.connector.getFieldById(id, field); + if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field).toString();} + String val = this.connector.getFieldById(id, field); if (val != null) updateIdCache(id); return val; } diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index 308525964..6348c79a3 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -22,12 +22,20 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; import net.yacy.cora.federate.solr.instance.EmbeddedInstance; import net.yacy.cora.federate.solr.instance.SolrInstance; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.search.schema.CollectionSchema; +import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; +import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrException; @@ -40,13 +48,20 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.SearchHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequestBase; -import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.response.ResultContext; import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.RefCounted; public class EmbeddedSolrConnector extends SolrServerConnector implements SolrConnector { + static Set SOLR_ID_FIELDS = new HashSet(); + static { + SOLR_ID_FIELDS.add(CollectionSchema.id.getSolrFieldName()); + } + public static final String SELECT = "/select"; public static final String CONTEXT = "/solr"; @@ -128,7 +143,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo NamedList responseHeader = new SimpleOrderedMap(); responseHeader.add("params", req.getOriginalParams().toNamedList()); rsp.add("responseHeader", responseHeader); - SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); + //SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); // send request to solr and create a result this.requestHandler.handleRequest(req, rsp); @@ -144,6 +159,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo return rsp; } + /** + * the usage of getResponseByParams is disencouraged for the embedded Solr connector. Please use request(SolrParams) instead. + * Reason: Solr makes a very complex folding/unfolding including data compression for SolrQueryResponses. + */ @Override public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException { if (this.server == null) throw new IOException("server disconnected"); @@ -164,4 +183,125 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo } } + private class DocListSearcher { + public SolrQueryRequest request; + public DocList response; + + public DocListSearcher(final String querystring, final int offset, final int count, final String ... fields) { + // construct query + final SolrQuery params = new SolrQuery(); + params.setQuery(querystring); + params.setRows(count); + params.setStart(offset); + params.setFacet(false); + params.clearSorts(); + if (fields.length > 0) params.setFields(fields); + params.setIncludeScore(false); + + // query the server + this.request = request(params); + SolrQueryResponse rsp = query(request); + this.response = ((ResultContext) rsp.getValues().get("response")).docs; + } + public void close() { + if (this.request != null) this.request.close(); + this.request = null; + this.response = null; + } + } + + @Override + public long getCountByQuery(String querystring) { + DocListSearcher docListSearcher = new DocListSearcher(querystring, 0, 0, CollectionSchema.id.getSolrFieldName()); + int numFound = docListSearcher.response.matches(); + docListSearcher.close(); + return numFound; + } + + @Override + public boolean existsById(String id) { + return getCountByQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id) > 0; + } + + @Override + public Set existsByIds(Collection ids) { + if (ids == null || ids.size() == 0) return new HashSet(); + if (ids.size() == 1 && ids instanceof Set) return existsById(ids.iterator().next()) ? (Set) ids : new HashSet(); + StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)" + for (String id: ids) { + sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR "); + } + if (sb.length() > 0) sb.setLength(sb.length() - 4); // cut off the last 'or' + DocListSearcher docListSearcher = new DocListSearcher(sb.toString(), 0, ids.size(), CollectionSchema.id.getSolrFieldName()); + //int numFound = docListSearcher.response.matches(); + int responseCount = docListSearcher.response.size(); + SolrIndexSearcher searcher = docListSearcher.request.getSearcher(); + DocIterator iterator = docListSearcher.response.iterator(); + HashSet idsr = new HashSet(); + try { + for (int i = 0; i < responseCount; i++) { + Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS); + idsr.add(doc.get(CollectionSchema.id.getSolrFieldName())); + } + } catch (IOException e) { + } finally { + docListSearcher.close(); + } + // construct a new id list from that + return idsr; + } + + @Override + public String getFieldById(final String id, final String field) throws IOException { + DocListSearcher docListSearcher = new DocListSearcher("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, 0, 1, CollectionSchema.id.getSolrFieldName()); + int numFound = docListSearcher.response.matches(); + if (numFound == 0) return null; + Set solrFields = new HashSet(); + solrFields.add(field); + try { + Document doc = docListSearcher.request.getSearcher().doc(docListSearcher.response.iterator().nextDoc(), solrFields); + return doc.get(field); + } catch (IOException e) { + e.printStackTrace(); + } finally { + docListSearcher.close(); + } + return null; + } + + @Override + public BlockingQueue concurrentIDsByQuery(final String querystring, final int offset, final int maxcount, final long maxtime) { + final BlockingQueue queue = new LinkedBlockingQueue(); + final long endtime = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; // we know infinity! + final Thread t = new Thread() { + @Override + public void run() { + int o = offset; + while (System.currentTimeMillis() < endtime) { + try { + DocListSearcher docListSearcher = new DocListSearcher(querystring, o, pagesize, CollectionSchema.id.getSolrFieldName()); + int responseCount = docListSearcher.response.size(); + SolrIndexSearcher searcher = docListSearcher.request.getSearcher(); + DocIterator iterator = docListSearcher.response.iterator(); + try { + for (int i = 0; i < responseCount; i++) { + Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS); + try {queue.put(doc.get(CollectionSchema.id.getSolrFieldName()));} catch (final InterruptedException e) {break;} + } + } catch (IOException e) { + } finally { + docListSearcher.close(); + } + if (responseCount < pagesize) break; + o += pagesize; + } catch (final SolrException e) { + break; + } + } + try {queue.put(AbstractSolrConnector.POISON_ID);} catch (final InterruptedException e1) {} + } + }; + t.start(); + return queue; + } } diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index 76f238c47..3f7a1453c 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -23,7 +23,10 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicLong; import net.yacy.cora.sorting.ReversibleScoreMap; @@ -338,4 +341,41 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo return s; } + @Override + public boolean existsById(String id) throws IOException { + return (this.solr0 != null && this.solr0.existsById(id)) || (this.solr1 != null && this.solr1.existsById(id)); + } + + @Override + public Set existsByIds(Collection ids) throws IOException { + if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids); + if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids); + Set s = new HashSet(); + s.addAll(this.solr0.existsByIds(ids)); + s.addAll(this.solr1.existsByIds(ids)); + return s; + } + + @Override + public String getFieldById(String key, String field) throws IOException { + if (this.solr0 != null && this.solr1 == null) return this.solr0.getFieldById(key, field); + if (this.solr0 == null && this.solr1 != null) return this.solr1.getFieldById(key, field); + String value = this.solr0.getFieldById(key, field); + if (value != null) return value; + return this.solr1.getFieldById(key, field); + } + + /* + @Override + public BlockingQueue concurrentDocumentsByQuery(String querystring, int offset, int maxcount, long maxtime, int buffersize, String... fields) { + return null; + } + */ + @Override + public BlockingQueue concurrentIDsByQuery(String querystring, int offset, int maxcount, long maxtime) { + if (this.solr0 != null && this.solr1 == null) return this.solr0.concurrentIDsByQuery(querystring, offset, maxcount, maxtime); + if (this.solr0 == null && this.solr1 != null) return this.solr1.concurrentIDsByQuery(querystring, offset, maxcount, maxtime); + return super.concurrentIDsByQuery(querystring, offset, maxcount, maxtime); + } + } diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java index 5d58d1c9e..e7a3dd957 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java @@ -139,7 +139,7 @@ public interface SolrConnector extends Iterable /* Iterable of document * @return one result or null if no result exists * @throws IOException */ - public Object getFieldById(final String key, final String field) throws IOException; + public String getFieldById(final String key, final String field) throws IOException; /** * get a document from solr by given key for the id-field diff --git a/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java b/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java index 897a4c640..b50495801 100644 --- a/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java +++ b/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java @@ -166,6 +166,10 @@ public class EmbeddedInstance implements SolrInstance { return this.containerPath; } + public CoreContainer getCoreContainer() { + return this.coreContainer; + } + @Override public String getDefaultCoreName() { return this.defaultCoreName; diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 40f6db2c4..46389247c 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -310,7 +310,9 @@ public final class Fulltext { if (urlHash == null) return null; Date x; try { - x = (Date) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName()); + String d = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName()); + if (d == null) return null; + x = new Date(Long.parseLong(d)); } catch (final IOException e) { return null; } @@ -322,7 +324,7 @@ public final class Fulltext { String x; try { - x = (String) this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName()); + x = this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName()); } catch (final IOException e) { return null; } @@ -642,7 +644,7 @@ public final class Fulltext { public String failReason(final String urlHash) throws IOException { if (urlHash == null) return null; - String reason = (String) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName()); + String reason = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName()); if (reason == null) return null; return reason.length() == 0 ? null : reason; } diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index e6a8fecde..adda277b2 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -1092,7 +1092,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri this.crt = new TreeMap(Base64Order.enhancedCoder); try { // select all documents for each host - BlockingQueue ids = connector.concurrentIDsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 0, 1000000, 600000); + BlockingQueue ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, 0, 1000000, 600000); String id; while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) { this.crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value}