Speed enhancements and less CPU usage during Solr searches when using

the embedded Solr (the default). This was obtained by cirumventing solrj
search encapsulation and the implementation of direct index access
methods to Solr.
The effect will not only be seen during search, but this has also a
strong effect on suggestions (much more) and less CPU power usage during
index distribution (which needs many search requests)
pull/1/head
Michael Peter Christen 12 years ago
parent 434e13b46d
commit cc39667399

@ -70,7 +70,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
catchSuccessQuery.setRows(0);
catchSuccessQuery.setStart(0);
}
private final static int pagesize = 100;
protected final static int pagesize = 100;
@Override
public boolean existsByQuery(final String query) throws IOException {
@ -83,10 +83,10 @@ public abstract class AbstractSolrConnector implements SolrConnector {
}
@Override
public Object getFieldById(final String key, final String field) throws IOException {
public String getFieldById(final String key, final String field) throws IOException {
SolrDocument doc = getDocumentById(key, field);
if (doc == null) return null;
return doc.getFieldValue(field);
return doc.getFieldValue(field).toString();
}
/**
@ -329,11 +329,11 @@ public abstract class AbstractSolrConnector implements SolrConnector {
}
@Override
public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException {
public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException {
final SolrQuery query = new SolrQuery();
assert key.length() == 12;
assert id.length() == 12;
// construct query
query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + key);
query.setQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id);
query.clearSorts();
query.setRows(1);
query.setStart(0);

@ -377,11 +377,11 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
}
@Override
public Object getFieldById(String id, String field) throws IOException {
public String getFieldById(String id, String field) throws IOException {
if (existIdFromDeleteQueue(id)) return null;
SolrInputDocument doc = getFromUpdateQueue(id);
if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field);}
Object val = this.connector.getFieldById(id, field);
if (doc != null) {cacheSuccessSign(); return doc.getFieldValue(field).toString();}
String val = this.connector.getFieldById(id, field);
if (val != null) updateIdCache(id);
return val;
}

@ -22,12 +22,20 @@
package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.federate.solr.instance.EmbeddedInstance;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
@ -40,13 +48,20 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
public class EmbeddedSolrConnector extends SolrServerConnector implements SolrConnector {
static Set<String> SOLR_ID_FIELDS = new HashSet<String>();
static {
SOLR_ID_FIELDS.add(CollectionSchema.id.getSolrFieldName());
}
public static final String SELECT = "/select";
public static final String CONTEXT = "/solr";
@ -128,7 +143,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
NamedList<Object> responseHeader = new SimpleOrderedMap<Object>();
responseHeader.add("params", req.getOriginalParams().toNamedList());
rsp.add("responseHeader", responseHeader);
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
//SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
// send request to solr and create a result
this.requestHandler.handleRequest(req, rsp);
@ -144,6 +159,10 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
return rsp;
}
/**
* the usage of getResponseByParams is disencouraged for the embedded Solr connector. Please use request(SolrParams) instead.
* Reason: Solr makes a very complex folding/unfolding including data compression for SolrQueryResponses.
*/
@Override
public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
if (this.server == null) throw new IOException("server disconnected");
@ -164,4 +183,125 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
}
}
private class DocListSearcher {
public SolrQueryRequest request;
public DocList response;
public DocListSearcher(final String querystring, final int offset, final int count, final String ... fields) {
// construct query
final SolrQuery params = new SolrQuery();
params.setQuery(querystring);
params.setRows(count);
params.setStart(offset);
params.setFacet(false);
params.clearSorts();
if (fields.length > 0) params.setFields(fields);
params.setIncludeScore(false);
// query the server
this.request = request(params);
SolrQueryResponse rsp = query(request);
this.response = ((ResultContext) rsp.getValues().get("response")).docs;
}
public void close() {
if (this.request != null) this.request.close();
this.request = null;
this.response = null;
}
}
@Override
public long getCountByQuery(String querystring) {
DocListSearcher docListSearcher = new DocListSearcher(querystring, 0, 0, CollectionSchema.id.getSolrFieldName());
int numFound = docListSearcher.response.matches();
docListSearcher.close();
return numFound;
}
@Override
public boolean existsById(String id) {
return getCountByQuery("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id) > 0;
}
@Override
public Set<String> existsByIds(Collection<String> ids) {
if (ids == null || ids.size() == 0) return new HashSet<String>();
if (ids.size() == 1 && ids instanceof Set) return existsById(ids.iterator().next()) ? (Set<String>) ids : new HashSet<String>();
StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
for (String id: ids) {
sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR ");
}
if (sb.length() > 0) sb.setLength(sb.length() - 4); // cut off the last 'or'
DocListSearcher docListSearcher = new DocListSearcher(sb.toString(), 0, ids.size(), CollectionSchema.id.getSolrFieldName());
//int numFound = docListSearcher.response.matches();
int responseCount = docListSearcher.response.size();
SolrIndexSearcher searcher = docListSearcher.request.getSearcher();
DocIterator iterator = docListSearcher.response.iterator();
HashSet<String> idsr = new HashSet<String>();
try {
for (int i = 0; i < responseCount; i++) {
Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS);
idsr.add(doc.get(CollectionSchema.id.getSolrFieldName()));
}
} catch (IOException e) {
} finally {
docListSearcher.close();
}
// construct a new id list from that
return idsr;
}
@Override
public String getFieldById(final String id, final String field) throws IOException {
DocListSearcher docListSearcher = new DocListSearcher("{!raw f=" + CollectionSchema.id.getSolrFieldName() + "}" + id, 0, 1, CollectionSchema.id.getSolrFieldName());
int numFound = docListSearcher.response.matches();
if (numFound == 0) return null;
Set<String> solrFields = new HashSet<String>();
solrFields.add(field);
try {
Document doc = docListSearcher.request.getSearcher().doc(docListSearcher.response.iterator().nextDoc(), solrFields);
return doc.get(field);
} catch (IOException e) {
e.printStackTrace();
} finally {
docListSearcher.close();
}
return null;
}
@Override
public BlockingQueue<String> concurrentIDsByQuery(final String querystring, final int offset, final int maxcount, final long maxtime) {
final BlockingQueue<String> queue = new LinkedBlockingQueue<String>();
final long endtime = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; // we know infinity!
final Thread t = new Thread() {
@Override
public void run() {
int o = offset;
while (System.currentTimeMillis() < endtime) {
try {
DocListSearcher docListSearcher = new DocListSearcher(querystring, o, pagesize, CollectionSchema.id.getSolrFieldName());
int responseCount = docListSearcher.response.size();
SolrIndexSearcher searcher = docListSearcher.request.getSearcher();
DocIterator iterator = docListSearcher.response.iterator();
try {
for (int i = 0; i < responseCount; i++) {
Document doc = searcher.doc(iterator.nextDoc(), SOLR_ID_FIELDS);
try {queue.put(doc.get(CollectionSchema.id.getSolrFieldName()));} catch (final InterruptedException e) {break;}
}
} catch (IOException e) {
} finally {
docListSearcher.close();
}
if (responseCount < pagesize) break;
o += pagesize;
} catch (final SolrException e) {
break;
}
}
try {queue.put(AbstractSolrConnector.POISON_ID);} catch (final InterruptedException e1) {}
}
};
t.start();
return queue;
}
}

@ -23,7 +23,10 @@ package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.cora.sorting.ReversibleScoreMap;
@ -338,4 +341,41 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return s;
}
@Override
public boolean existsById(String id) throws IOException {
return (this.solr0 != null && this.solr0.existsById(id)) || (this.solr1 != null && this.solr1.existsById(id));
}
@Override
public Set<String> existsByIds(Collection<String> ids) throws IOException {
if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids);
if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids);
Set<String> s = new HashSet<String>();
s.addAll(this.solr0.existsByIds(ids));
s.addAll(this.solr1.existsByIds(ids));
return s;
}
@Override
public String getFieldById(String key, String field) throws IOException {
if (this.solr0 != null && this.solr1 == null) return this.solr0.getFieldById(key, field);
if (this.solr0 == null && this.solr1 != null) return this.solr1.getFieldById(key, field);
String value = this.solr0.getFieldById(key, field);
if (value != null) return value;
return this.solr1.getFieldById(key, field);
}
/*
@Override
public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(String querystring, int offset, int maxcount, long maxtime, int buffersize, String... fields) {
return null;
}
*/
@Override
public BlockingQueue<String> concurrentIDsByQuery(String querystring, int offset, int maxcount, long maxtime) {
if (this.solr0 != null && this.solr1 == null) return this.solr0.concurrentIDsByQuery(querystring, offset, maxcount, maxtime);
if (this.solr0 == null && this.solr1 != null) return this.solr1.concurrentIDsByQuery(querystring, offset, maxcount, maxtime);
return super.concurrentIDsByQuery(querystring, offset, maxcount, maxtime);
}
}

@ -139,7 +139,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
* @return one result or null if no result exists
* @throws IOException
*/
public Object getFieldById(final String key, final String field) throws IOException;
public String getFieldById(final String key, final String field) throws IOException;
/**
* get a document from solr by given key for the id-field

@ -166,6 +166,10 @@ public class EmbeddedInstance implements SolrInstance {
return this.containerPath;
}
public CoreContainer getCoreContainer() {
return this.coreContainer;
}
@Override
public String getDefaultCoreName() {
return this.defaultCoreName;

@ -310,7 +310,9 @@ public final class Fulltext {
if (urlHash == null) return null;
Date x;
try {
x = (Date) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName());
String d = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.load_date_dt.getSolrFieldName());
if (d == null) return null;
x = new Date(Long.parseLong(d));
} catch (final IOException e) {
return null;
}
@ -322,7 +324,7 @@ public final class Fulltext {
String x;
try {
x = (String) this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName());
x = this.getDefaultConnector().getFieldById(ASCII.String(urlHash), CollectionSchema.sku.getSolrFieldName());
} catch (final IOException e) {
return null;
}
@ -642,7 +644,7 @@ public final class Fulltext {
public String failReason(final String urlHash) throws IOException {
if (urlHash == null) return null;
String reason = (String) this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName());
String reason = this.getDefaultConnector().getFieldById(urlHash, CollectionSchema.failreason_s.getSolrFieldName());
if (reason == null) return null;
return reason.length() == 0 ? null : reason;
}

@ -1092,7 +1092,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
this.crt = new TreeMap<byte[], double[]>(Base64Order.enhancedCoder);
try {
// select all documents for each host
BlockingQueue<String> ids = connector.concurrentIDsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 0, 1000000, 600000);
BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, 0, 1000000, 600000);
String id;
while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
this.crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value}

Loading…
Cancel
Save