- renamed DoubleSolrConnector to MirrorSolrConnector and added a

hit/miss/document cache to the MirrorSolrConnector.
- more abstraction to SolrDocument in Connector interface
- bugfixes in Solr field reader
pull/1/head
Michael Peter Christen 13 years ago
parent 94a334f128
commit e5ef840f40

@ -48,10 +48,10 @@ public class AbstractSolrConnector implements SolrConnector {
catchallQuery.setRows(1);
catchallQuery.setStart(0);
}
protected SolrServer server;
protected int commitWithinMs; // max time (in ms) before a commit will happen
protected AbstractSolrConnector() {
this.server = null;
this.commitWithinMs = 180000;
@ -169,7 +169,7 @@ public class AbstractSolrConnector implements SolrConnector {
}
@Override
public void add(final SolrDoc solrdoc) throws IOException, SolrException {
public void add(final SolrInputDocument solrdoc) throws IOException, SolrException {
try {
this.server.add(solrdoc, this.commitWithinMs);
//this.server.commit();
@ -180,9 +180,9 @@ public class AbstractSolrConnector implements SolrConnector {
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
ArrayList<SolrInputDocument> l = new ArrayList<SolrInputDocument>();
for (SolrDoc d: solrdocs) l.add(d);
for (SolrInputDocument d: solrdocs) l.add(d);
try {
this.server.add(l, this.commitWithinMs);
//this.server.commit();

@ -24,19 +24,42 @@ import java.io.IOException;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.search.index.YaCySchema;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
/**
* Implementation of a mirrored solr connector.
* Two Solr servers can be attached to serve as storage and search locations.
* When doing a retrieval only the first Solr is requested, if it does not answer or has no result, the second is used.
* When data is stored or deleted this applies to both attached solr.
* It is also possible to attach only one of the solr instances.
* Because it is not possible to set a cache in front of this class (the single connect methods would need to be passed through the cache class),
* this class also contains an object and hit/miss cache.
*/
public class MirrorSolrConnector implements SolrConnector {
public class DoubleSolrConnector implements SolrConnector {
private final static Object EXIST = new Object();
private SolrConnector solr0;
private SolrConnector solr1;
private final ARC<String, Object> hitCache, missCache;
private final ARC<String, SolrDocument> documentCache;
public DoubleSolrConnector() {
public MirrorSolrConnector(int hitCacheMax, int missCacheMax, int docCacheMax) {
this.solr0 = null;
this.solr1 = null;
int partitions = Runtime.getRuntime().availableProcessors() * 2;
this.hitCache = new ConcurrentARC<String, Object>(hitCacheMax, partitions);
this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
this.documentCache = new ConcurrentARC<String, SolrDocument>(docCacheMax, partitions);
}
public boolean isConnected0() {
@ -75,6 +98,12 @@ public class DoubleSolrConnector implements SolrConnector {
this.solr1 = null;
}
public void clearCache() {
this.hitCache.clear();
this.missCache.clear();
this.documentCache.clear();
}
@Override
public int getCommitWithinMs() {
if (this.solr0 != null) this.solr0.getCommitWithinMs();
@ -104,6 +133,9 @@ public class DoubleSolrConnector implements SolrConnector {
*/
@Override
public void clear() throws IOException {
this.hitCache.clear();
this.missCache.clear();
this.documentCache.clear();
if (this.solr0 != null) this.solr0.clear();
if (this.solr1 != null) this.solr1.clear();
}
@ -115,6 +147,9 @@ public class DoubleSolrConnector implements SolrConnector {
*/
@Override
public void delete(final String id) throws IOException {
this.hitCache.remove(id);
this.missCache.put(id, EXIST);
this.documentCache.remove(id);
if (this.solr0 != null) this.solr0.delete(id);
if (this.solr1 != null) this.solr1.delete(id);
}
@ -126,6 +161,11 @@ public class DoubleSolrConnector implements SolrConnector {
*/
@Override
public void delete(final List<String> ids) throws IOException {
for (String id: ids) {
this.hitCache.remove(id);
this.missCache.put(id, EXIST);
this.documentCache.remove(id);
}
if (this.solr0 != null) this.solr0.delete(ids);
if (this.solr1 != null) this.solr1.delete(ids);
}
@ -138,25 +178,46 @@ public class DoubleSolrConnector implements SolrConnector {
*/
@Override
public boolean exists(final String id) throws IOException {
if (this.hitCache.containsKey(id) || this.documentCache.containsKey(id)) return true;
if (this.missCache.containsKey(id)) return false;
if (this.solr0 != null) {
if (this.solr0.exists(id)) return true;
if (this.solr0.exists(id)) {
this.hitCache.put(id, EXIST);
return true;
}
}
if (this.solr1 != null) {
if (this.solr1.exists(id)) return true;
if (this.solr1.exists(id)) {
this.hitCache.put(id, EXIST);
return true;
}
}
this.missCache.put(id, EXIST);
return false;
}
@Override
public SolrDocument get(String id) throws IOException {
SolrDocument doc = this.documentCache.get(id);
if (this.missCache.containsKey(id)) return null;
if (doc != null) return doc;
if (this.solr0 != null) {
SolrDocument doc = this.solr0.get(id);
if (doc != null) return doc;
doc = this.solr0.get(id);
if (doc != null) {
this.hitCache.put(id, EXIST);
this.documentCache.put(id, doc);
return doc;
}
}
if (this.solr1 != null) {
SolrDocument doc = this.solr1.get(id);
if (doc != null) return doc;
doc = this.solr1.get(id);
if (doc != null) {
this.hitCache.put(id, EXIST);
this.documentCache.put(id, doc);
return doc;
}
}
this.missCache.put(id, EXIST);
return null;
}
@ -166,22 +227,34 @@ public class DoubleSolrConnector implements SolrConnector {
* @throws IOException
*/
@Override
public void add(final SolrDoc solrdoc) throws IOException {
public void add(final SolrInputDocument solrdoc) throws IOException {
if (this.solr0 != null) {
this.solr0.add(solrdoc);
}
if (this.solr1 != null) {
this.solr1.add(solrdoc);
}
String id = (String) solrdoc.getFieldValue(YaCySchema.id.name());
if (id != null) {
this.hitCache.put(id, EXIST);
this.documentCache.put(id, ClientUtils.toSolrDocument(solrdoc));
}
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
if (this.solr0 != null) {
for (SolrDoc d: solrdocs) this.solr0.add(d);
for (SolrInputDocument d: solrdocs) this.solr0.add(d);
}
if (this.solr1 != null) {
for (SolrDoc d: solrdocs) this.solr1.add(d);
for (SolrInputDocument d: solrdocs) this.solr1.add(d);
}
for (SolrInputDocument solrdoc: solrdocs) {
String id = (String) solrdoc.getFieldValue(YaCySchema.id.name());
if (id != null) {
this.hitCache.put(id, EXIST);
this.documentCache.put(id, ClientUtils.toSolrDocument(solrdoc));
}
}
}
@ -193,7 +266,13 @@ public class DoubleSolrConnector implements SolrConnector {
*/
@Override
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException {
if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList();
final SolrDocumentList list = new SolrDocumentList();
if (this.solr0 == null && this.solr1 == null) return list;
if (offset == 0 && count == 1 && querystring.startsWith("id:")) {
SolrDocument doc = get(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3));
list.add(doc);
return list;
}
if (this.solr0 != null && this.solr1 == null) return this.solr0.query(querystring, offset, count);
if (this.solr1 != null && this.solr0 == null) return this.solr1.query(querystring, offset, count);
@ -211,10 +290,18 @@ public class DoubleSolrConnector implements SolrConnector {
}
// now use the size of the first query to do a second query
final SolrDocumentList list = new SolrDocumentList();
for (final SolrDocument d: l) list.add(d);
l = this.solr1.query(querystring, offset + l.size() - size0, count - l.size());
for (final SolrDocument d: l) list.add(d);
// add caching
for (final SolrDocument solrdoc: list) {
String id = (String) solrdoc.getFieldValue(YaCySchema.id.name());
if (id != null) {
this.hitCache.put(id, EXIST);
this.documentCache.put(id, solrdoc);
}
}
return list;
}

@ -28,19 +28,20 @@ import java.util.concurrent.ArrayBlockingQueue;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
public class MultipleSolrConnector implements SolrConnector {
private final static SolrDoc POISON_DOC = new SolrDoc();
private final static SolrInputDocument POISON_DOC = new SolrInputDocument();
private final ArrayBlockingQueue<SolrDoc> queue;
private final ArrayBlockingQueue<SolrInputDocument> queue;
private final AddWorker[] worker;
private final SolrConnector solr;
private int commitWithinMs;
public MultipleSolrConnector(final String url, int connections) throws IOException {
this.solr = new SingleSolrConnector(url);
this.queue = new ArrayBlockingQueue<SolrDoc>(1000);
this.queue = new ArrayBlockingQueue<SolrInputDocument>(1000);
this.worker = new AddWorker[connections];
this.commitWithinMs = 180000;
for (int i = 0; i < connections; i++) {
@ -57,7 +58,7 @@ public class MultipleSolrConnector implements SolrConnector {
}
@Override
public void run() {
SolrDoc doc;
SolrInputDocument doc;
try {
while ((doc = MultipleSolrConnector.this.queue.take()) != POISON_DOC) {
try {
@ -138,7 +139,7 @@ public class MultipleSolrConnector implements SolrConnector {
}
@Override
public void add(final SolrDoc solrdoc) throws IOException, SolrException {
public void add(final SolrInputDocument solrdoc) throws IOException, SolrException {
try {
this.queue.put(solrdoc);
} catch (InterruptedException e) {
@ -147,8 +148,8 @@ public class MultipleSolrConnector implements SolrConnector {
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
for (SolrDoc d: solrdocs) {
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
for (SolrInputDocument d: solrdocs) {
try {
this.queue.put(d);
} catch (InterruptedException e) {

@ -27,6 +27,7 @@ import java.util.List;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
public class RetrySolrConnector implements SolrConnector {
@ -133,7 +134,7 @@ public class RetrySolrConnector implements SolrConnector {
}
@Override
public void add(final SolrDoc solrdoc) throws IOException, SolrException {
public void add(final SolrInputDocument solrdoc) throws IOException, SolrException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
@ -148,8 +149,8 @@ public class RetrySolrConnector implements SolrConnector {
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
for (SolrDoc d: solrdocs) add(d);
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
for (SolrInputDocument d: solrdocs) add(d);
}
@Override

@ -31,8 +31,11 @@ import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.search.index.YaCySchema;
import org.apache.solr.common.SolrInputDocument;
public class ShardSelection {
public final static Charset charsetUTF8;
@ -57,7 +60,7 @@ public class ShardSelection {
return (int) (this.chardID.getAndIncrement() % this.dimension);
}
public int select(final SolrDoc solrdoc) throws IOException {
public int select(final SolrInputDocument solrdoc) throws IOException {
if (this.method == Method.MODULO_HOST_MD5) {
final String sku = (String) solrdoc.getField(YaCySchema.sku.getSolrFieldName()).getValue();
return selectURL(sku);

@ -35,6 +35,7 @@ import net.yacy.cora.protocol.Domains;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
public class ShardSolrConnector implements SolrConnector {
@ -116,7 +117,7 @@ public class ShardSolrConnector implements SolrConnector {
}
return false;
}
@Override
public SolrDocument get(String id) throws IOException {
for (final SolrConnector connector: this.connectors) {
@ -132,13 +133,13 @@ public class ShardSolrConnector implements SolrConnector {
* @throws IOException
*/
@Override
public void add(final SolrDoc solrdoc) throws IOException {
public void add(final SolrInputDocument solrdoc) throws IOException {
this.connectors.get(this.sharding.select(solrdoc)).add(solrdoc);
}
@Override
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException {
for (SolrDoc d: solrdocs) this.connectors.get(this.sharding.select(d)).add(d);
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
for (SolrInputDocument d: solrdocs) this.connectors.get(this.sharding.select(d)).add(d);
}
/**
@ -146,8 +147,8 @@ public class ShardSolrConnector implements SolrConnector {
* @param docs
* @throws IOException
*/
protected void addSolr(final Collection<SolrDoc> docs) throws IOException {
for (final SolrDoc doc: docs) add(doc);
protected void addSolr(final Collection<SolrInputDocument> docs) throws IOException {
for (final SolrInputDocument doc: docs) add(doc);
}
/**

@ -31,6 +31,7 @@ import java.util.List;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
public interface SolrConnector {
@ -85,8 +86,8 @@ public interface SolrConnector {
* @throws IOException
* @throws SolrException
*/
public void add(final SolrDoc solrdoc) throws IOException, SolrException;
public void add(final Collection<SolrDoc> solrdocs) throws IOException, SolrException;
public void add(final SolrInputDocument solrdoc) throws IOException, SolrException;
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException;
/**
* get a document from solr by given id

@ -25,7 +25,6 @@ package net.yacy.kelondro.data.meta;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Pattern;
import net.yacy.cora.date.GenericFormatter;
@ -124,11 +123,16 @@ public class URIMetadataNode implements URIMetadata {
return x;
}
private ArrayList<Object> getArrayList(YaCySchema field) {
@SuppressWarnings({ "unchecked", "rawtypes" })
ArrayList<Object> x = (ArrayList) this.doc.getFieldValue(field.name());
if (x == null) return new ArrayList<Object>(0);
return x;
@SuppressWarnings("unchecked")
private ArrayList<String> getArrayList(YaCySchema field) {
Object r = this.doc.getFieldValue(field.name());
if (r == null) return new ArrayList<String>(0);
if (r instanceof ArrayList) {
return (ArrayList<String>) r;
}
ArrayList<String> a = new ArrayList<String>(1);
a.add((String) r);
return a;
}
@Override
@ -158,10 +162,9 @@ public class URIMetadataNode implements URIMetadata {
@Override
public String dc_title() {
@SuppressWarnings("unchecked")
List<String> titles = (List<String>) this.doc.getFieldValue(YaCySchema.title.name());
if (titles == null || titles.size() == 0) return "";
return titles.get(0);
ArrayList<String> a = getArrayList(YaCySchema.title);
if (a == null || a.size() == 0) return "";
return a.get(0);
}
@Override
@ -211,24 +214,24 @@ public class URIMetadataNode implements URIMetadata {
@Override
public char doctype() {
ArrayList<Object> a = getArrayList(YaCySchema.content_type);
ArrayList<String> a = getArrayList(YaCySchema.content_type);
if (a == null || a.size() == 0) return Response.docType(this.url);
return Response.docType((String) a.get(0));
return Response.docType(a.get(0));
}
@Override
public byte[] language() {
ArrayList<Object> languages = getArrayList(YaCySchema.language_txt);
ArrayList<String> languages = getArrayList(YaCySchema.language_txt);
if (languages == null || languages.size() == 0) return ASCII.getBytes("en");
return UTF8.getBytes((String) languages.get(0));
return UTF8.getBytes(languages.get(0));
}
@Override
public byte[] referrerHash() {
ArrayList<Object> referrer = getArrayList(YaCySchema.referrer_id_txt);
ArrayList<String> referrer = getArrayList(YaCySchema.referrer_id_txt);
if (referrer == null || referrer.size() == 0) return null;
return ASCII.getBytes((String) referrer.get(0));
return ASCII.getBytes(referrer.get(0));
}
@Override

@ -38,6 +38,7 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.order.CloneableIterator;
import net.yacy.cora.storage.HandleMap;
import net.yacy.cora.util.SpaceExceededException;
@ -487,7 +488,7 @@ public class Table implements Index, Iterable<Row.Entry> {
//assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size() + ", file = " + filename();
assert this.table == null || this.table.size() == this.index.size() : "table.size() = " + this.table.size() + ", index.size() = " + this.index.size() + ", file = " + filename();
e = get0(key);
assert e == null || this.rowdef.objectOrder.equal(key, e.getPrimaryKeyBytes());
assert e == null || this.rowdef.objectOrder.equal(key, e.getPrimaryKeyBytes()) : "key = " + ASCII.String(key) + ", e.k = " + ASCII.String(e.getPrimaryKeyBytes());
return e;
}
}

@ -40,7 +40,7 @@ import java.util.TreeSet;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.order.CloneableIterator;
import net.yacy.cora.services.federated.solr.DoubleSolrConnector;
import net.yacy.cora.services.federated.solr.MirrorSolrConnector;
import net.yacy.cora.services.federated.solr.SolrConnector;
import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap;
@ -73,7 +73,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
private Export exportthread; // will have a export thread assigned if exporter is running
private String tablename;
private ArrayList<HostStat> statsDump;
private final DoubleSolrConnector solr;
private final MirrorSolrConnector solr;
private final SolrConfiguration solrScheme;
public MetadataRepository(final File path, final SolrConfiguration solrScheme) {
@ -82,7 +82,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
this.urlIndexFile = null;
this.exportthread = null; // will have a export thread assigned if exporter is running
this.statsDump = null;
this.solr = new DoubleSolrConnector();
this.solr = new MirrorSolrConnector(10000, 10000, 1000);
this.solrScheme = solrScheme;
}

@ -55,13 +55,11 @@ import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.index.YaCySchema;
import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import de.anomic.crawler.Cache;
import de.anomic.data.WorkTables;
@ -500,17 +498,11 @@ public class SnippetProcess {
String solrContent = null;
if (this.solr != null) {
SolrDocument sd = null;
StringBuilder querystring = new StringBuilder(17);
querystring.append(YaCySchema.id.getSolrFieldName()).append(':').append('"').append(ASCII.String(page.hash())).append('"');
SolrDocumentList sdl = null;
try {
sdl = this.solr.query(querystring.toString(), 0, 1);
sd = this.solr.get(ASCII.String(page.hash()));
} catch (IOException e) {
Log.logException(e);
}
if (sdl != null && !sdl.isEmpty()) {
sd = sdl.get(0);
}
if (sd != null) {
solrContent = Switchboard.getSwitchboard().index.getSolrScheme().solrGetText(sd);
}

Loading…
Cancel
Save