From cc39667399399899eaee15734345c863cedefbc4 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 1 Nov 2013 17:24:36 +0100 Subject: [PATCH] Speed enhancements and less CPU usage during Solr searches when using the embedded Solr (the default). This was obtained by cirumventing solrj search encapsulation and the implementation of direct index access methods to Solr. The effect will not only be seen during search, but this has also a strong effect on suggestions (much more) and less CPU power usage during index distribution (which needs many search requests) --- .../solr/connector/AbstractSolrConnector.java | 12 +- .../ConcurrentUpdateSolrConnector.java | 6 +- .../solr/connector/EmbeddedSolrConnector.java | 144 +++++++++++++++++- .../solr/connector/MirrorSolrConnector.java | 40 +++++ .../solr/connector/SolrConnector.java | 2 +- .../solr/instance/EmbeddedInstance.java | 4 + source/net/yacy/search/index/Fulltext.java | 8 +- .../schema/CollectionConfiguration.java | 2 +- 8 files changed, 202 insertions(+), 16 deletions(-) diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index 67b7a321b..e2f6f31d3 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -70,7 +70,7 @@ public abstract class AbstractSolrConnector implements SolrConnector { catchSuccessQuery.setRows(0); catchSuccessQuery.setStart(0); } - private final static int pagesize = 100; + protected final static int pagesize = 100; @Override public boolean existsByQuery(final String query) throws IOException { @@ -83,10 +83,10 @@ public abstract class AbstractSolrConnector implements SolrConnector { } @Override - public Object getFieldById(final String key, final String field) throws IOException { + public String getFieldById(final String key, final String field) throws IOException { SolrDocument doc = getDocumentById(key, field); if (doc == null) return null; - return doc.getFieldValue(field); + return doc.getFieldValue(field).toString(); } /** @@ -329,11 +329,11 @@ public abstract class AbstractSolrConnector implements SolrConnector { } @Override - public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException { + public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException { final SolrQuery query = new SolrQuery(); - assert key.length() == 12; + assert id.length() == 12; // construct query - query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + key); + query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id); query.clearSorts(); query.setRows(1); query.setStart(0); diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java index 47fae1aab..8eff5f315 100644 --- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java @@ -377,11 +377,11 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { } @Override - public Object getFieldById(String id, String field) throws IOException { + public String getFieldById(String id, String field) throws IOException { if (existIdFromDeleteQueue(id)) return null; SolrInputDocument doc = getFromUpdateQueue(id); - if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field);} - Object val = this.connector.getFieldById(id, field); + if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field).toString();} + String val = this.connector.getFieldById(id, field); if (val != null) updateIdCache(id); return val; } diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index 308525964..6348c79a3 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -22,12 +22,20 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; import net.yacy.cora.federate.solr.instance.EmbeddedInstance; import net.yacy.cora.federate.solr.instance.SolrInstance; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.search.schema.CollectionSchema; +import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; +import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrException; @@ -40,13 +48,20 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.SearchHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequestBase; -import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.response.ResultContext; import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.RefCounted; public class EmbeddedSolrConnector extends SolrServerConnector implements SolrConnector { + static Set SOLR_ID_FIELDS = new HashSet(); + static { + SOLR_ID_FIELDS.add(CollectionSchema.id.getSolrFieldName()); + } + public static final String SELECT = "/select"; public static final String CONTEXT = "/solr"; @@ -128,7 +143,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo NamedList responseHeader = new SimpleOrderedMap(); responseHeader.add("params", req.getOriginalParams().toNamedList()); rsp.add("responseHeader", responseHeader); - SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); + //SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); // send request to solr and create a result this.requestHandler.handleRequest(req, rsp); @@ -144,6 +159,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo return rsp; } + /** + * the usage of getResponseByParams is disencouraged for the embedded Solr connector. Please use request(SolrParams) instead. + * Reason: Solr makes a very complex folding/unfolding including data compression for SolrQueryResponses. + */ @Override public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException { if (this.server == null) throw new IOException("server disconnected"); @@ -164,4 +183,125 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo } } + private class DocListSearcher { + public SolrQueryRequest request; + public DocList response; + + public DocListSearcher(final String querystring, final int offset, final int count, final String ... fields) { + // construct query + final SolrQuery params = new SolrQuery(); + params.setQuery(querystring); + params.setRows(count); + params.setStart(offset); + params.setFacet(false); + params.clearSorts(); + if (fields.length > 0) params.setFields(fields); + params.setIncludeScore(false); + + // query the server + this.request = request(params); + SolrQueryResponse rsp = query(request); + this.response = ((ResultContext) rsp.getValues().get("response")).docs; + } + public void close() { + if (this.request != null) this.request.close(); + this.request = null; + this.response = null; + } + } + + @Override + public long getCountByQuery(String querystring) { + DocListSearcher docListSearcher = new DocListSearcher(querystring, 0, 0, CollectionSchema.id.getSolrFieldName()); + int numFound = docListSearcher.response.matches(); + docListSearcher.close(); + return numFound; + } + + @Override + public boolean existsById(String id) { + return getCountByQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id) > 0; + } + + @Override + public Set existsByIds(Collection ids) { + if (ids == null || ids.size() == 0) return new HashSet(); + if (ids.size() == 1 && ids instanceof Set) return existsById(ids.iterator().next()) ? (Set) ids : new HashSet(); + StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)" + for (String id: ids) { + sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR "); + } + if (sb.length() > 0) sb.setLength(sb.length() - 4); // cut off the last 'or' + DocListSearcher docListSearcher = new DocListSearcher(sb.toString(), 0, ids.size(), CollectionSchema.id.getSolrFieldName()); + //int numFound = docListSearcher.response.matches(); + int responseCount = docListSearcher.response.size(); + SolrIndexSearcher searcher = docListSearcher.request.getSearcher(); + DocIterator iterator = docListSearcher.response.iterator(); + HashSet idsr = new HashSet(); + try { + for (int i = 0; i < responseCount; i++) { + Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS); + idsr.add(doc.get(CollectionSchema.id.getSolrFieldName())); + } + } catch (IOException e) { + } finally { + docListSearcher.close(); + } + // construct a new id list from that + return idsr; + } + + @Override + public String getFieldById(final String id, final String field) throws IOException { + DocListSearcher docListSearcher = new DocListSearcher("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, 0, 1, CollectionSchema.id.getSolrFieldName()); + int numFound = docListSearcher.response.matches(); + if (numFound == 0) return null; + Set solrFields = new HashSet(); + solrFields.add(field); + try { + Document doc = docListSearcher.request.getSearcher().doc(docListSearcher.response.iterator().nextDoc(), solrFields); + return doc.get(field); + } catch (IOException e) { + e.printStackTrace(); + } finally { + docListSearcher.close(); + } + return null; + } + + @Override + public BlockingQueue concurrentIDsByQuery(final String querystring, final int offset, final int maxcount, final long maxtime) { + final BlockingQueue queue = new LinkedBlockingQueue(); + final long endtime = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; // we know infinity! + final Thread t = new Thread() { + @Override + public void run() { + int o = offset; + while (System.currentTimeMillis() < endtime) { + try { + DocListSearcher docListSearcher = new DocListSearcher(querystring, o, pagesize, CollectionSchema.id.getSolrFieldName()); + int responseCount = docListSearcher.response.size(); + SolrIndexSearcher searcher = docListSearcher.request.getSearcher(); + DocIterator iterator = docListSearcher.response.iterator(); + try { + for (int i = 0; i < responseCount; i++) { + Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS); + try {queue.put(doc.get(CollectionSchema.id.getSolrFieldName()));} catch (final InterruptedException e) {break;} + } + } catch (IOException e) { + } finally { + docListSearcher.close(); + } + if (responseCount < pagesize) break; + o += pagesize; + } catch (final SolrException e) { + break; + } + } + try {queue.put(AbstractSolrConnector.POISON_ID);} catch (final InterruptedException e1) {} + } + }; + t.start(); + return queue; + } } diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index 76f238c47..3f7a1453c 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -23,7 +23,10 @@ package net.yacy.cora.federate.solr.connector; import java.io.IOException; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicLong; import net.yacy.cora.sorting.ReversibleScoreMap; @@ -338,4 +341,41 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo return s; } + @Override + public boolean existsById(String id) throws IOException { + return (this.solr0 != null && this.solr0.existsById(id)) || (this.solr1 != null && this.solr1.existsById(id)); + } + + @Override + public Set existsByIds(Collection ids) throws IOException { + if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids); + if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids); + Set s = new HashSet(); + s.addAll(this.solr0.existsByIds(ids)); + s.addAll(this.solr1.existsByIds(ids)); + return s; + } + + @Override + public String getFieldById(String key, String field) throws IOException { + if (this.solr0 != null && this.solr1 == null) return this.solr0.getFieldById(key, field); + if (this.solr0 == null && this.solr1 != null) return this.solr1.getFieldById(key, field); + String value = this.solr0.getFieldById(key, field); + if (value != null) return value; + return this.solr1.getFieldById(key, field); + } + + /* + @Override + public BlockingQueue concurrentDocumentsByQuery(String querystring, int offset, int maxcount, long maxtime, int buffersize, String... fields) { + return null; + } + */ + @Override + public BlockingQueue concurrentIDsByQuery(String querystring, int offset, int maxcount, long maxtime) { + if (this.solr0 != null && this.solr1 == null) return this.solr0.concurrentIDsByQuery(querystring, offset, maxcount, maxtime); + if (this.solr0 == null && this.solr1 != null) return this.solr1.concurrentIDsByQuery(querystring, offset, maxcount, maxtime); + return super.concurrentIDsByQuery(querystring, offset, maxcount, maxtime); + } + } diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java index 5d58d1c9e..e7a3dd957 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java @@ -139,7 +139,7 @@ public interface SolrConnector extends Iterable /* Iterable of document * @return one result or null if no result exists * @throws IOException */ - public Object getFieldById(final String key, final String field) throws IOException; + public String getFieldById(final String key, final String field) throws IOException; /** * get a document from solr by given key for the id-field diff --git a/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java b/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java index 897a4c640..b50495801 100644 --- a/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java +++ b/source/net/yacy/cora/federate/solr/instance/EmbeddedInstance.java @@ -166,6 +166,10 @@ public class EmbeddedInstance implements SolrInstance { return this.containerPath; } + public CoreContainer getCoreContainer() { + return this.coreContainer; + } + @Override public String getDefaultCoreName() { return this.defaultCoreName; diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 40f6db2c4..46389247c 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -310,7 +310,9 @@ public final class Fulltext { if (urlHash == null) return null; Date x; try { - x = (Date) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName()); + String d = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName()); + if (d == null) return null; + x = new Date(Long.parseLong(d)); } catch (final IOException e) { return null; } @@ -322,7 +324,7 @@ public final class Fulltext { String x; try { - x = (String) this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName()); + x = this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName()); } catch (final IOException e) { return null; } @@ -642,7 +644,7 @@ public final class Fulltext { public String failReason(final String urlHash) throws IOException { if (urlHash == null) return null; - String reason = (String) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName()); + String reason = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName()); if (reason == null) return null; return reason.length() == 0 ? null : reason; } diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index e6a8fecde..adda277b2 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -1092,7 +1092,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri this.crt = new TreeMap(Base64Order.enhancedCoder); try { // select all documents for each host - BlockingQueue ids = connector.concurrentIDsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 0, 1000000, 600000); + BlockingQueue ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, 0, 1000000, 600000); String id; while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) { this.crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value}