From b7004043ead54eb17ac047e6ecbaf623683da2c4 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sat, 24 Nov 2012 22:30:05 +0100 Subject: [PATCH] - added a field cache for solr queries which call only for a single value - fixed a version conflict exception within a solr add request --- htroot/PerformanceMemory_p.java | 2 +- htroot/js/Crawler.js | 2 +- .../solr/connector/AbstractSolrConnector.java | 7 ++ .../solr/connector/MirrorSolrConnector.java | 65 +++++++++++++++---- .../solr/connector/MultipleSolrConnector.java | 5 ++ .../solr/connector/SolrConnector.java | 9 +++ .../solr/connector/SolrServerConnector.java | 27 ++++++-- source/net/yacy/search/index/Fulltext.java | 31 +++------ .../yacy/search/index/SolrConfiguration.java | 9 ++- 9 files changed, 114 insertions(+), 43 deletions(-) diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 3b428d9af..d7a51aa1e 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -205,7 +205,7 @@ public class PerformanceMemory_p { // other caching structures final MirrorSolrConnector solr = (MirrorSolrConnector) Switchboard.getSwitchboard().index.fulltext().getSolr(); - final MirrorSolrConnector.HitMissCache hitMissCache = solr.getCache(YaCySchema.id.getSolrFieldName()); + final MirrorSolrConnector.HitMissCache hitMissCache = solr.getHitMissCache(YaCySchema.id.getSolrFieldName()); prop.putNum("solrcacheHit.size", solr.nameCacheHitSize()); prop.putNum("solrcacheHit.Hit", hitMissCache.hitCache_Hit); prop.putNum("solrcacheHit.Miss", hitMissCache.hitCache_Miss); diff --git a/htroot/js/Crawler.js b/htroot/js/Crawler.js index 1fba3b457..208ba6b1e 100644 --- a/htroot/js/Crawler.js +++ b/htroot/js/Crawler.js @@ -74,7 +74,7 @@ function handleStatus(){ var ppmSpan = document.getElementById("ppmSpan"); removeAllChildren(ppmSpan); - for(i = 0; i < ppm / 10; i++){ + for(i = 0; i < ppm / 25; i++){ img=document.createElement("img"); img.setAttribute("src", BAR_IMG1); ppmSpan.appendChild(img); diff --git a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java index fdf386d9d..087e3cd6c 100644 --- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java @@ -73,6 +73,13 @@ public abstract class AbstractSolrConnector implements SolrConnector { return false; } } + + @Override + public Object getFieldById(final String key, final String field) throws IOException { + SolrDocument doc = getById(key, field); + if (doc == null) return null; + return doc.getFieldValue(field); + } /** * Get a query result from solr as a stream of documents. diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index f6fa85a8b..0489b9925 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import net.yacy.cora.federate.solr.YaCySchema; @@ -59,6 +60,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo private SolrConnector solr1; private int hitCacheMax, missCacheMax, partitions; private final Map hitMissCache; + private final Map> fieldCache; // a map from a field name to a id-key/value object cache private final ARC documentCache; public long documentCache_Hit = 0, documentCache_Miss = 0, documentCache_Insert = 0; // for statistics only; do not write @@ -86,12 +88,13 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo this.hitCacheMax = hitCacheMax; this.missCacheMax = missCacheMax; this.partitions = Runtime.getRuntime().availableProcessors() * 2; - this.hitMissCache = new HashMap(); + this.hitMissCache = new ConcurrentHashMap(); + this.fieldCache = new ConcurrentHashMap>(); this.documentCache = new ConcurrentARC(docCacheMax, this.partitions); } - public HitMissCache getCache(String field) { + public HitMissCache getHitMissCache(String field) { HitMissCache c = this.hitMissCache.get(field); if (c == null) { c = new HitMissCache(this.hitCacheMax, this.missCacheMax, this.partitions); @@ -100,6 +103,15 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo return c; } + public ARC getFieldCache(String field) { + ARC c = this.fieldCache.get(field); + if (c == null) { + c = new ConcurrentARC(this.hitCacheMax, this.partitions); + this.fieldCache.put(field, c); + } + return c; + } + public boolean isConnected0() { return this.solr0 != null; } @@ -137,7 +149,8 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo } public void clearCache() { - for (HitMissCache c: hitMissCache.values()) c.clearCache(); + for (HitMissCache c: this.hitMissCache.values()) c.clearCache(); + for (ARC c: this.fieldCache.values()) c.clear(); this.documentCache.clear(); } @@ -189,7 +202,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo @Override public void delete(final String id) throws IOException { this.documentCache.remove(id); - HitMissCache c = getCache("id"); + HitMissCache c = getHitMissCache("id"); c.hitCache.remove(id); c.missCache.put(id, EXIST); c.missCache_Insert++; @@ -206,7 +219,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo public void delete(final List ids) throws IOException { for (String id: ids) { this.documentCache.remove(id); - HitMissCache c = getCache("id"); + HitMissCache c = getHitMissCache("id"); c.hitCache.remove(id); c.missCache.put(id, EXIST); c.missCache_Insert++; @@ -226,7 +239,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo @Override public boolean exists(final String fieldName, final String key) throws IOException { - HitMissCache c = getCache(fieldName); + HitMissCache c = getHitMissCache(fieldName); if (c.hitCache.containsKey(key)) { c.hitCache_Hit++; return true; @@ -252,16 +265,32 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo c.missCache_Insert++; return false; } + + @Override + public Object getFieldById(final String key, final String field) throws IOException { + // try to get this from this cache + ARC c = getFieldCache(field); + Object o = c.get(key); + if (o != null) return o; + + // load the document + o = super.getFieldById(key, field); + + // use result to fill the cache + if (o == null) return null; + c.put(key, o); + return o; + } @Override public SolrDocument getById(final String key, final String ... fields) throws IOException { - SolrDocument doc = fields.length == 0 ? this.documentCache.get(key) : null; + SolrDocument doc = this.documentCache.get(key); if (doc != null) { this.documentCache_Hit++; return doc; } documentCache_Miss++; - HitMissCache c = this.getCache(YaCySchema.id.getSolrFieldName()); + HitMissCache c = this.getHitMissCache(YaCySchema.id.getSolrFieldName()); if (c.missCache.containsKey(key)) { c.missCache_Hit++; return null; @@ -277,6 +306,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo this.commit(); if ((solr0 != null && ((doc = solr0.getById(key, fields)) != null)) || (solr1 != null && ((doc = solr1.getById(key, fields)) != null))) { addToCache(doc, fields.length == 0); + return doc; } } c.missCache.put(key, EXIST); @@ -467,6 +497,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo private void addToCache(SolrDocument doc, boolean doccach) { for (Map.Entry e: this.hitMissCache.entrySet()) { Object keyo = doc.getFieldValue(e.getKey()); + if (keyo == null) continue; String key = null; if (keyo instanceof String) key = (String) keyo; if (keyo instanceof Integer) key = ((Integer) keyo).toString(); @@ -478,29 +509,37 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo c.hitCache_Insert++; } } + + String id = (String) doc.getFieldValue(YaCySchema.id.getSolrFieldName()); + if (id != null) { + for (Map.Entry> e: this.fieldCache.entrySet()) { + Object keyo = doc.getFieldValue(e.getKey()); + if (keyo != null) e.getValue().put(id, keyo); + } + if (doccach) { - this.documentCache.put((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName()), doc); + this.documentCache.put(id, doc); this.documentCache_Insert++; } + } } - @Override public long getSize() { long s = 0; if (this.solr0 != null) s += this.solr0.getSize(); if (this.solr1 != null) s += this.solr1.getSize(); - HitMissCache c = getCache("id"); + HitMissCache c = getHitMissCache("id"); return Math.max(this.documentCache.size(), Math.max(c.hitCache.size(), s)); } public int nameCacheHitSize() { - HitMissCache c = getCache("id"); + HitMissCache c = getHitMissCache("id"); return c.hitCache.size(); } public int nameCacheMissSize() { - HitMissCache c = getCache("id"); + HitMissCache c = getHitMissCache("id"); return c.missCache.size(); } diff --git a/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java index 6a5dd6143..644db8a87 100644 --- a/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MultipleSolrConnector.java @@ -143,6 +143,11 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr return this.solr.deleteByQuery(querystring); } + @Override + public Object getFieldById(final String key, final String field) throws IOException { + return this.solr.getFieldById(key, field); + } + @Override public SolrDocument getById(final String key, final String ... fields) throws IOException { return this.solr.getById(key, fields); diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java index e6df1e59d..639d7ccbc 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java @@ -105,6 +105,15 @@ public interface SolrConnector extends Iterable /* Iterable of document public void add(final SolrInputDocument solrdoc) throws IOException, SolrException; public void add(final Collection solrdocs) throws IOException, SolrException; + /** + * get a field value from solr by given key for the id-field and a field name + * @param key + * @param field one field + * @return one result or null if no result exists + * @throws IOException + */ + public Object getFieldById(final String key, final String field) throws IOException; + /** * get a document from solr by given key for the id-field * @param key diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java index 3946d3eb1..2b85f47fc 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java @@ -109,6 +109,14 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen @Override public long getSize() { + /* + if (this.server instanceof EmbeddedSolrServer) { + EmbeddedSolrServer ess = (EmbeddedSolrServer) this.server; + CoreContainer coreContainer = ess.getCoreContainer(); + String coreName = coreContainer.getDefaultCoreName(); + SolrCore core = coreContainer.getCore(coreName); + } + */ try { final QueryResponse rsp = query(AbstractSolrConnector.catchSuccessQuery); if (rsp == null) return 0; @@ -190,7 +198,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen try { synchronized (this.server) { this.server.request(up); - this.server.commit(); + //this.server.commit(); } } catch (final Throwable e) { throw new IOException(e); @@ -204,11 +212,20 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen synchronized (this.server) { //this.server.deleteById((String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName())); this.server.add(solrdoc, this.commitWithinMs); - //this.server.commit(); } } catch (SolrServerException e) { - log.warn(e.getMessage() + " DOC=" + solrdoc.toString()); - throw new IOException(e); + // ok try this again and delete the document in advance + try { + this.server.deleteById((String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName())); + } catch (SolrServerException e1) {} + try { + synchronized (this.server) { + this.server.add(solrdoc, this.commitWithinMs); + } + } catch (SolrServerException ee) { + log.warn(e.getMessage() + " DOC=" + solrdoc.toString()); + throw new IOException(ee); + } } } @@ -315,7 +332,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen abstract public QueryResponse query(ModifiableSolrParams params) throws IOException; private final char[] queryIDTemplate = "id:\" \"".toCharArray(); - + @Override public SolrDocument getById(final String key, final String ... fields) throws IOException { final SolrQuery query = new SolrQuery(); diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 997800ebb..214caa078 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -215,29 +215,23 @@ public final class Fulltext implements Iterable { public Date getLoadDate(final String urlHash) { if (urlHash == null) return null; - SolrDocument doc; + Date x; try { - doc = this.solr.getById(urlHash, YaCySchema.load_date_dt.getSolrFieldName()); + x = (Date) this.solr.getFieldById(urlHash, YaCySchema.load_date_dt.getSolrFieldName()); } catch (IOException e) { return null; } - if (doc == null) return null; - Date x = (Date) doc.getFieldValue(YaCySchema.load_date_dt.getSolrFieldName()); - if (x == null) return new Date(0); - Date now = new Date(); - return x.after(now) ? now : x; + return x; } public DigestURI getURL(final byte[] urlHash) { if (urlHash == null) return null; - SolrDocument doc; + String x; try { - doc = this.solr.getById(ASCII.String(urlHash), YaCySchema.sku.getSolrFieldName()); + x = (String) this.solr.getFieldById(ASCII.String(urlHash), YaCySchema.sku.getSolrFieldName()); } catch (IOException e) { return null; } - if (doc == null) return null; - String x = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()); if (x == null) return null; try { DigestURI uri = new DigestURI(x, urlHash); @@ -291,13 +285,9 @@ public final class Fulltext implements Iterable { byte[] idb = ASCII.getBytes(id); try { if (this.urlIndexFile != null) this.urlIndexFile.remove(idb); - SolrDocument sd = this.solr.getById(id, YaCySchema.last_modified.getSolrFieldName()); - Date now = new Date(); - Date sdDate = sd == null ? null : URIMetadataNode.getDate(sd, YaCySchema.last_modified); - if (sdDate == null || sdDate.after(now)) sdDate = now; - Date docDate = SolrConfiguration.getDate(doc, YaCySchema.last_modified); - if (docDate.after(now)) docDate = now; - if (sd == null || sdDate.before(docDate)) { + Date sdDate = (Date) this.solr.getFieldById(id, YaCySchema.last_modified.getSolrFieldName()); + Date docDate = null; + if (sdDate == null || (docDate = SolrConfiguration.getDate(doc, YaCySchema.last_modified)) == null || sdDate.before(docDate)) { if (this.solrScheme.contains(YaCySchema.ip_s)) { // ip_s needs a dns lookup which causes blockings during search here this.solr.add(doc); @@ -492,9 +482,8 @@ public final class Fulltext implements Iterable { public String failReason(final String urlHash) throws IOException { if (urlHash == null) return null; - SolrDocument doc = this.solr.getById(urlHash, YaCySchema.failreason_t.getSolrFieldName()); - if (doc == null) return null; - String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.getSolrFieldName()); + String reason = (String) this.solr.getFieldById(urlHash, YaCySchema.failreason_t.getSolrFieldName()); + if (reason == null) return null; return reason == null ? null : reason.length() == 0 ? null : reason; } diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 7ce8d31dd..2ae03282b 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -383,8 +383,13 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable } if (allAttr || contains(YaCySchema.text_t)) add(doc, YaCySchema.text_t, content); if (allAttr || contains(YaCySchema.wordcount_i)) { - final int contentwc = content.split(" ").length; - add(doc, YaCySchema.wordcount_i, contentwc); + if (content.length() == 0) { + add(doc, YaCySchema.wordcount_i, 0); + } else { + int contentwc = 1; + for (int i = content.length() - 1; i >= 0; i--) if (content.charAt(i) == ' ') contentwc++; + add(doc, YaCySchema.wordcount_i, contentwc); + } } if (allAttr || contains(YaCySchema.synonyms_sxt)) { List synonyms = condenser.synonyms();