redesign of exists()-query (can now be called with query) and the

CachedSolrConnector which based its cache on the key value. This will be
used to correct the title_unique_b and description_unique_b field.
pull/1/head
Michael Peter Christen 12 years ago
parent 27d6222880
commit f24ac518e6

@ -41,7 +41,6 @@ import net.yacy.kelondro.util.Formatter;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -205,16 +204,15 @@ public class PerformanceMemory_p {
// other caching structures // other caching structures
final CachedSolrConnector solr = (CachedSolrConnector) Switchboard.getSwitchboard().index.fulltext().getDefaultConnector(); final CachedSolrConnector solr = (CachedSolrConnector) Switchboard.getSwitchboard().index.fulltext().getDefaultConnector();
final CachedSolrConnector.HitMissCache hitMissCache = solr.getCache(CollectionSchema.id.getSolrFieldName());
prop.putNum("solrcacheHit.size", solr.nameCacheHitSize()); prop.putNum("solrcacheHit.size", solr.nameCacheHitSize());
prop.putNum("solrcacheHit.Hit", hitMissCache.hitCache_Hit); prop.putNum("solrcacheHit.Hit", solr.hitCache_Hit);
prop.putNum("solrcacheHit.Miss", hitMissCache.hitCache_Miss); prop.putNum("solrcacheHit.Miss", solr.hitCache_Miss);
prop.putNum("solrcacheHit.Insert", hitMissCache.hitCache_Insert); prop.putNum("solrcacheHit.Insert", solr.hitCache_Insert);
prop.putNum("solrcacheMiss.size", solr.nameCacheMissSize()); prop.putNum("solrcacheMiss.size", solr.nameCacheMissSize());
prop.putNum("solrcacheMiss.Hit", hitMissCache.missCache_Hit); prop.putNum("solrcacheMiss.Hit", solr.missCache_Hit);
prop.putNum("solrcacheMiss.Miss", hitMissCache.missCache_Miss); prop.putNum("solrcacheMiss.Miss", solr.missCache_Miss);
prop.putNum("solrcacheMiss.Insert", hitMissCache.missCache_Insert); prop.putNum("solrcacheMiss.Insert", solr.missCache_Insert);
prop.putNum("solrcacheDocument.size", solr.nameCacheDocumentSize()); prop.putNum("solrcacheDocument.size", solr.nameCacheDocumentSize());
prop.putNum("solrcacheDocument.Hit", solr.documentCache_Hit); prop.putNum("solrcacheDocument.Hit", solr.documentCache_Hit);

@ -67,11 +67,14 @@ public abstract class AbstractSolrConnector implements SolrConnector {
} }
private final static int pagesize = 100; private final static int pagesize = 100;
public static String idQuery(String id) {
return CollectionSchema.id.getSolrFieldName() + ":\"" + id + "\"";
}
@Override @Override
public boolean exists(final String fieldName, final String key) throws IOException { public boolean existsByQuery(final String query) throws IOException {
if (fieldName == null) return false;
try { try {
long count = getQueryCount(fieldName + ":\"" + key + "\""); long count = getQueryCount(query);
return count > 0; return count > 0;
} catch (final Throwable e) { } catch (final Throwable e) {
return false; return false;

@ -24,7 +24,6 @@ import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.storage.ARC; import net.yacy.cora.storage.ARC;
@ -45,50 +44,23 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
private final static Object EXIST = new Object(); private final static Object EXIST = new Object();
private SolrConnector solr; private SolrConnector solr;
private int hitCacheMax, missCacheMax, partitions;
private final Map<String, HitMissCache> hitMissCache;
private final ARC<String, SolrDocument> documentCache; private final ARC<String, SolrDocument> documentCache;
public final ARC<String, Object> hitCache, missCache;
public long documentCache_Hit = 0, documentCache_Miss = 0, documentCache_Insert = 0; // for statistics only; do not write public long documentCache_Hit = 0, documentCache_Miss = 0, documentCache_Insert = 0; // for statistics only; do not write
public long hitCache_Hit = 0, hitCache_Miss = 0, hitCache_Insert = 0; // for statistics only; do not write
public long missCache_Hit = 0, missCache_Miss = 0, missCache_Insert = 0; // for statistics only; do not write
public static class HitMissCache {
public final ARC<String, Object> hitCache, missCache;
public long hitCache_Hit = 0, hitCache_Miss = 0, hitCache_Insert = 0; // for statistics only; do not write
public long missCache_Hit = 0, missCache_Miss = 0, missCache_Insert = 0; // for statistics only; do not write
public HitMissCache(int hitCacheMax, int missCacheMax, int partitions) {
this.hitCache = new ConcurrentARC<String, Object>(hitCacheMax, partitions);
this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
}
public void clearCache() {
this.hitCache.clear();
this.missCache.clear();
}
}
public CachedSolrConnector(SolrConnector c, int hitCacheMax, int missCacheMax, int docCacheMax) { public CachedSolrConnector(SolrConnector c, int hitCacheMax, int missCacheMax, int docCacheMax) {
this.solr = c; this.solr = c;
this.hitCacheMax = hitCacheMax; int partitions = Runtime.getRuntime().availableProcessors() * 2;
this.missCacheMax = missCacheMax; this.documentCache = new ConcurrentARC<String, SolrDocument>(docCacheMax, partitions);
this.partitions = Runtime.getRuntime().availableProcessors() * 2; this.hitCache = new ConcurrentARC<String, Object>(hitCacheMax, partitions);
this.hitMissCache = new ConcurrentHashMap<String, HitMissCache>(); this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
this.documentCache = new ConcurrentARC<String, SolrDocument>(docCacheMax, this.partitions);
}
public HitMissCache getCache(String field) {
HitMissCache c = this.hitMissCache.get(field);
if (c == null) {
c = new HitMissCache(this.hitCacheMax, this.missCacheMax, this.partitions);
this.hitMissCache.put(field, c);
}
return c;
} }
public void clearCache() { public void clearCache() {
for (HitMissCache c: hitMissCache.values()) c.clearCache(); this.hitCache.clear();
this.missCache.clear();
this.documentCache.clear(); this.documentCache.clear();
if (this.solr != null) this.solr.commit(true); if (this.solr != null) this.solr.commit(true);
} }
@ -116,13 +88,13 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
* @throws IOException * @throws IOException
*/ */
@Override @Override
public void delete(final String id) throws IOException { public void deleteById(final String id) throws IOException {
this.documentCache.remove(id); String q = idQuery(id);
HitMissCache c = getCache("id"); this.documentCache.remove(q);
c.hitCache.remove(id); this.hitCache.remove(q);
c.missCache.put(id, EXIST); this.missCache.put(q, EXIST);
c.missCache_Insert++; this.missCache_Insert++;
if (this.solr != null) this.solr.delete(id); if (this.solr != null) this.solr.deleteByQuery(q);
} }
/** /**
@ -131,15 +103,15 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
* @throws IOException * @throws IOException
*/ */
@Override @Override
public void delete(final List<String> ids) throws IOException { public void deleteByIds(final List<String> ids) throws IOException {
for (String id: ids) { for (String id: ids) {
this.documentCache.remove(id); String q = idQuery(id);
HitMissCache c = getCache("id"); this.documentCache.remove(q);
c.hitCache.remove(id); this.hitCache.remove(q);
c.missCache.put(id, EXIST); this.missCache.put(q, EXIST);
c.missCache_Insert++; this.missCache_Insert++;
} }
if (this.solr != null) this.solr.delete(ids); if (this.solr != null) this.solr.deleteByIds(ids);
} }
@Override @Override
@ -149,61 +121,60 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
} }
@Override @Override
public boolean exists(final String fieldName, final String key) throws IOException { public boolean existsByQuery(final String query) throws IOException {
HitMissCache c = getCache(fieldName); if (this.hitCache.containsKey(query)) {
if (c.hitCache.containsKey(key)) { this.hitCache_Hit++;
c.hitCache_Hit++;
return true; return true;
} }
c.hitCache_Miss++; this.hitCache_Miss++;
if (this.documentCache.containsKey(key)) { if (this.documentCache.containsKey(query)) {
this.documentCache_Hit++; this.documentCache_Hit++;
return true; return true;
} }
this.documentCache_Miss++; this.documentCache_Miss++;
if (c.missCache.containsKey(key)) { if (this.missCache.containsKey(query)) {
c.missCache_Hit++; this.missCache_Hit++;
return false; return false;
} }
c.missCache_Miss++; this.missCache_Miss++;
if (solr != null && solr.exists(fieldName, key)) { if (solr != null && solr.existsByQuery(query)) {
c.missCache.remove(key); this.missCache.remove(query);
c.hitCache.put(key, EXIST); this.hitCache.put(query, EXIST);
c.hitCache_Insert++; this.hitCache_Insert++;
return true; return true;
} }
c.missCache.put(key, EXIST); this.missCache.put(query, EXIST);
c.missCache_Insert++; this.missCache_Insert++;
return false; return false;
} }
@Override @Override
public SolrDocument getById(final String key, final String ... fields) throws IOException { public SolrDocument getById(final String id, final String ... fields) throws IOException {
SolrDocument doc = fields.length == 0 ? this.documentCache.get(key) : null; String q = idQuery(id);
SolrDocument doc = fields.length == 0 ? this.documentCache.get(q) : null;
if (doc != null) { if (doc != null) {
this.documentCache_Hit++; this.documentCache_Hit++;
return doc; return doc;
} }
documentCache_Miss++; documentCache_Miss++;
HitMissCache c = this.getCache(CollectionSchema.id.getSolrFieldName()); if (this.missCache.containsKey(q)) {
if (c.missCache.containsKey(key)) { this.missCache_Hit++;
c.missCache_Hit++;
return null; return null;
} }
c.missCache_Miss++; this.missCache_Miss++;
if (solr != null && ((doc = solr.getById(key, fields)) != null)) { if (solr != null && ((doc = solr.getById(id, fields)) != null)) {
addToCache(doc, fields.length == 0); addToCache(doc, fields.length == 0);
return doc; return doc;
} }
// check if there is a autocommit problem // check if there is a autocommit problem
if (c.hitCache.containsKey(key)) { if (this.hitCache.containsKey(q)) {
// the document should be there, therefore make a commit and check again // the document should be there, therefore make a commit and check again
if (solr != null && ((doc = solr.getById(key, fields)) != null)) { if (solr != null && ((doc = solr.getById(id, fields)) != null)) {
addToCache(doc, fields.length == 0); addToCache(doc, fields.length == 0);
} }
} }
c.missCache.put(key, EXIST); this.missCache.put(q, EXIST);
c.missCache_Insert++; this.missCache_Insert++;
return null; return null;
} }
@ -217,9 +188,10 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
String id = (String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()); String id = (String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName());
assert id != null; assert id != null;
if (id == null) return; if (id == null) return;
String q = idQuery(id);
SolrDocument doc = ClientUtils.toSolrDocument(solrdoc); SolrDocument doc = ClientUtils.toSolrDocument(solrdoc);
addToCache(doc, true); addToCache(doc, true);
this.documentCache.put(id, doc); this.documentCache.put(q, doc);
this.documentCache_Insert++; this.documentCache_Insert++;
if (this.solr != null) this.solr.add(solrdoc); if (this.solr != null) this.solr.add(solrdoc);
} }
@ -230,9 +202,10 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
String id = (String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()); String id = (String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName());
assert id != null; assert id != null;
if (id == null) continue; if (id == null) continue;
String q = idQuery(id);
SolrDocument doc = ClientUtils.toSolrDocument(solrdoc); SolrDocument doc = ClientUtils.toSolrDocument(solrdoc);
addToCache(doc, true); addToCache(doc, true);
this.documentCache.put(id, doc); this.documentCache.put(q, doc);
this.documentCache_Insert++; this.documentCache_Insert++;
} }
if (this.solr != null) this.solr.add(solrdocs); if (this.solr != null) this.solr.add(solrdocs);
@ -292,41 +265,29 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
} }
private void addToCache(SolrDocument doc, boolean doccach) { private void addToCache(SolrDocument doc, boolean doccach) {
for (Map.Entry<String, HitMissCache> e: this.hitMissCache.entrySet()) { String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
Object keyo = doc.getFieldValue(e.getKey()); String q = idQuery(id);
String key = null; this.missCache.remove(q);
if (keyo instanceof String) key = (String) keyo; this.hitCache.put(q, EXIST);
if (keyo instanceof Integer) key = ((Integer) keyo).toString(); this.hitCache_Insert++;
if (keyo instanceof Long) key = ((Long) keyo).toString();
if (key != null) {
HitMissCache c = e.getValue();
c.missCache.remove(key);
c.hitCache.put(key, EXIST);
c.hitCache_Insert++;
}
}
if (doccach) { if (doccach) {
this.documentCache.put((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()), doc); this.documentCache.put(q, doc);
this.documentCache_Insert++; this.documentCache_Insert++;
} }
} }
@Override @Override
public long getSize() { public long getSize() {
long s = this.solr.getSize(); long s = this.solr.getSize();
HitMissCache c = getCache("id"); return Math.max(this.documentCache.size(), Math.max(this.hitCache.size(), s)); // this might be incorrect if there are other requests than "id:.." in the cache
return Math.max(this.documentCache.size(), Math.max(c.hitCache.size(), s));
} }
public int nameCacheHitSize() { public int nameCacheHitSize() {
HitMissCache c = getCache("id"); return this.hitCache.size();
return c.hitCache.size();
} }
public int nameCacheMissSize() { public int nameCacheMissSize() {
HitMissCache c = getCache("id"); return this.missCache.size();
return c.missCache.size();
} }
public int nameCacheDocumentSize() { public int nameCacheDocumentSize() {

@ -122,9 +122,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
* @throws IOException * @throws IOException
*/ */
@Override @Override
public void delete(final String id) throws IOException { public void deleteById(final String id) throws IOException {
if (this.solr0 != null) this.solr0.delete(id); if (this.solr0 != null) this.solr0.deleteById(id);
if (this.solr1 != null) this.solr1.delete(id); if (this.solr1 != null) this.solr1.deleteById(id);
} }
/** /**
@ -133,9 +133,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
* @throws IOException * @throws IOException
*/ */
@Override @Override
public void delete(final List<String> ids) throws IOException { public void deleteByIds(final List<String> ids) throws IOException {
if (this.solr0 != null) this.solr0.delete(ids); if (this.solr0 != null) this.solr0.deleteByIds(ids);
if (this.solr1 != null) this.solr1.delete(ids); if (this.solr1 != null) this.solr1.deleteByIds(ids);
} }
@Override @Override
@ -145,8 +145,8 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
} }
@Override @Override
public boolean exists(final String fieldName, final String key) throws IOException { public boolean existsByQuery(final String query) throws IOException {
if ((solr0 != null && solr0.exists(fieldName, key)) || (solr1 != null && solr1.exists(fieldName, key))) { if ((solr0 != null && solr0.existsByQuery(query)) || (solr1 != null && solr1.existsByQuery(query))) {
return true; return true;
} }
return false; return false;

@ -64,14 +64,14 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
* @param id the url hash of the entry * @param id the url hash of the entry
* @throws IOException * @throws IOException
*/ */
public void delete(final String id) throws IOException; public void deleteById(final String id) throws IOException;
/** /**
* delete a set of entries from solr; entries are identified by their url hash * delete a set of entries from solr; entries are identified by their url hash
* @param ids a list of url hashes * @param ids a list of url hashes
* @throws IOException * @throws IOException
*/ */
public void delete(final List<String> ids) throws IOException; public void deleteByIds(final List<String> ids) throws IOException;
/** /**
* delete entries from solr according the given solr query string * delete entries from solr according the given solr query string
@ -87,7 +87,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
* @return true if any entry in solr exists * @return true if any entry in solr exists
* @throws IOException * @throws IOException
*/ */
public boolean exists(final String fieldName, final String key) throws IOException; public boolean existsByQuery(final String solrquery) throws IOException;
/** /**
* add a solr input document * add a solr input document

@ -119,7 +119,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
} }
@Override @Override
public synchronized void delete(final String id) throws IOException { public synchronized void deleteById(final String id) throws IOException {
try { try {
this.server.deleteById(id, -1); this.server.deleteById(id, -1);
} catch (final Throwable e) { } catch (final Throwable e) {
@ -128,7 +128,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
} }
@Override @Override
public synchronized void delete(final List<String> ids) throws IOException { public synchronized void deleteByIds(final List<String> ids) throws IOException {
try { try {
this.server.deleteById(ids, -1); this.server.deleteById(ids, -1);
} catch (final Throwable e) { } catch (final Throwable e) {

@ -686,7 +686,7 @@ public final class Fulltext {
try { try {
synchronized (Fulltext.this.solrInstances) { synchronized (Fulltext.this.solrInstances) {
for (byte[] urlHash: deleteIDs) { for (byte[] urlHash: deleteIDs) {
Fulltext.this.getDefaultConnector().delete(ASCII.String(urlHash)); Fulltext.this.getDefaultConnector().deleteById(ASCII.String(urlHash));
Fulltext.this.getWebgraphConnector().deleteByQuery(WebgraphSchema.source_id_s.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\""); Fulltext.this.getWebgraphConnector().deleteByQuery(WebgraphSchema.source_id_s.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\"");
} }
Fulltext.this.commit(true); Fulltext.this.commit(true);
@ -708,7 +708,7 @@ public final class Fulltext {
if (urlHash == null) return false; if (urlHash == null) return false;
try { try {
synchronized (this.solrInstances) { synchronized (this.solrInstances) {
this.getDefaultConnector().delete(ASCII.String(urlHash)); this.getDefaultConnector().deleteById(ASCII.String(urlHash));
this.getWebgraphConnector().deleteByQuery(WebgraphSchema.source_id_s.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\""); this.getWebgraphConnector().deleteByQuery(WebgraphSchema.source_id_s.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\"");
} }
} catch (final Throwable e) { } catch (final Throwable e) {
@ -733,7 +733,7 @@ public final class Fulltext {
if (urlHash.equals(doc.getFieldValue(CollectionSchema.id.getSolrFieldName()))) return true; if (urlHash.equals(doc.getFieldValue(CollectionSchema.id.getSolrFieldName()))) return true;
} }
try { try {
if (this.getDefaultConnector().exists(CollectionSchema.id.getSolrFieldName(), urlHash)) return true; if (this.getDefaultConnector().existsByQuery(AbstractSolrConnector.idQuery(urlHash))) return true;
} catch (final Throwable e) { } catch (final Throwable e) {
Log.logException(e); Log.logException(e);
} }

@ -502,7 +502,7 @@ public class Segment {
// lookup the document with the same signature // lookup the document with the same signature
long signature = ((Long) vector.getField(checkfield.getSolrFieldName()).getValue()).longValue(); long signature = ((Long) vector.getField(checkfield.getSolrFieldName()).getValue()).longValue();
try { try {
if (this.fulltext.getDefaultConnector().exists(checkfield.getSolrFieldName(), Long.toString(signature))) { if (this.fulltext.getDefaultConnector().existsByQuery(checkfield.getSolrFieldName() + ":\"" + Long.toString(signature) + "\"")) {
// change unique attribut in content // change unique attribut in content
vector.setField(uniquefield.getSolrFieldName(), false); vector.setField(uniquefield.getSolrFieldName(), false);
} }
@ -525,7 +525,7 @@ public class Segment {
} }
checkstring = ClientUtils.escapeQueryChars("\"" + checkstring + "\""); checkstring = ClientUtils.escapeQueryChars("\"" + checkstring + "\"");
try { try {
if (this.fulltext.getDefaultConnector().exists(checkfield.getSolrFieldName(), checkstring)) { if (this.fulltext.getDefaultConnector().existsByQuery(checkfield.getSolrFieldName() + ":\"" + checkstring + "\"")) {
// switch unique attribute in new document // switch unique attribute in new document
vector.setField(uniquefield.getSolrFieldName(), false); vector.setField(uniquefield.getSolrFieldName(), false);
// switch attribute also in all existing documents (which should be exactly only one!) // switch attribute also in all existing documents (which should be exactly only one!)

Loading…
Cancel
Save