From 81bb50118e047e927c67f69c2631ac435066f1cc Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 7 Nov 2013 10:01:44 +0100 Subject: [PATCH] found and fixed a huge memory leak in solr caching (inside Solr). The not-flushed Solr cache is now handled in this way: - it is smaller by default - an Solr-internal process is started to flush the cache periodically (this does NOT clean the cache, just removes old objects) - a Solr-external process (the standard YaCy cleanup-process) now has direct access to the solr internal cache and flushes them completely. The time frame for such a flush is defined by the cleanup-process frequency, by default 10 minutes. --- defaults/solr/solrconfig.xml | 39 +++++++++++-------- htroot/ContentAnalysis_p.java | 2 +- htroot/RankingSolr_p.java | 2 +- htroot/yacysearch.java | 2 +- .../solr/connector/CachedSolrConnector.java | 10 ++--- .../ConcurrentUpdateSolrConnector.java | 6 +++ .../solr/connector/EmbeddedSolrConnector.java | 21 ++++++++++ .../solr/connector/MirrorSolrConnector.java | 6 +++ .../solr/connector/RemoteSolrConnector.java | 6 +++ .../solr/connector/SolrConnector.java | 7 +++- .../solr/connector/SolrServerConnector.java | 2 +- .../solr/instance/InstanceMirror.java | 5 +-- source/net/yacy/search/ResourceObserver.java | 2 +- source/net/yacy/search/Switchboard.java | 2 +- source/net/yacy/search/index/Fulltext.java | 14 +++---- source/net/yacy/search/index/Segment.java | 4 +- 16 files changed, 89 insertions(+), 41 deletions(-) diff --git a/defaults/solr/solrconfig.xml b/defaults/solr/solrconfig.xml index 1234dd0d3..d8044f969 100644 --- a/defaults/solr/solrconfig.xml +++ b/defaults/solr/solrconfig.xml @@ -461,19 +461,21 @@ and old cache. --> + size="64" + initialSize="64" + autowarmCount="4" + cleanupThread="true"/> - + - + diff --git a/htroot/ContentAnalysis_p.java b/htroot/ContentAnalysis_p.java index 2ba573ab0..eed8455e7 100644 --- a/htroot/ContentAnalysis_p.java +++ b/htroot/ContentAnalysis_p.java @@ -34,7 +34,7 @@ public class ContentAnalysis_p { // clean up all search events SearchEventCache.cleanupEvents(true); - sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings + sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings if (post != null && post.containsKey("EnterDoublecheck")) { Ranking.setMinTokenLen(post.getInt("minTokenLen", 3)); diff --git a/htroot/RankingSolr_p.java b/htroot/RankingSolr_p.java index 04784f938..91e543a11 100644 --- a/htroot/RankingSolr_p.java +++ b/htroot/RankingSolr_p.java @@ -38,7 +38,7 @@ public class RankingSolr_p { // clean up all search events SearchEventCache.cleanupEvents(true); - sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings + sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings int profileNr = 0; if (post != null) profileNr = post.getInt("profileNr", profileNr); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 284a0b15e..b79c8061b 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -360,7 +360,7 @@ public class yacysearch { // check available memory and clean up if necessary if ( !MemoryControl.request(8000000L, false) ) { - indexSegment.clearCache(); + indexSegment.clearCaches(); SearchEventCache.cleanupEvents(false); } diff --git a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java index c96fe2d33..eaf93603c 100644 --- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java @@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo this.missCache = new ConcurrentARC(missCacheMax, partitions); } - public void clearCache() { + public void clearCaches() { this.hitCache.clear(); this.missCache.clear(); this.documentCache.clear(); @@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo @Override public synchronized void close() { + this.clearCaches(); if (this.solr != null) this.solr.close(); this.solr = null; - this.clearCache(); } /** @@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo */ @Override public void clear() throws IOException { - this.clearCache(); + this.clearCaches(); if (this.solr != null) this.solr.clear(); } @@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo @Override public void deleteByQuery(final String querystring) throws IOException { - this.clearCache(); + this.clearCaches(); this.solr.deleteByQuery(querystring); } @@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo } private void addToCache(SolrDocumentList list, boolean doccache) { - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); for (final SolrDocument solrdoc: list) { addToCache(solrdoc, doccache); } diff --git a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java index 792d921ad..ddbf550ec 100644 --- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java @@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector { ensureAliveUpdateHandler(); } + @Override + public void clearCaches() { + this.connector.clearCaches(); + this.idCache.clear(); + } + /** * used for debugging */ diff --git a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java index 533ecb080..10d36a9c9 100644 --- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java @@ -34,6 +34,7 @@ import net.yacy.search.schema.CollectionSchema; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.Query; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; @@ -47,10 +48,14 @@ import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.SearchHandler; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.request.UnInvertedField; import org.apache.solr.response.ResultContext; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.QueryResultKey; +import org.apache.solr.search.SolrCache; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.RefCounted; @@ -88,6 +93,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo super.init(this.instance.getServer(coreName)); } + public void clearCaches() { + SolrConfig solrConfig = this.core.getSolrConfig(); + @SuppressWarnings("unchecked") + SolrCache fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance(); + if (fieldValueCache != null) fieldValueCache.clear(); + @SuppressWarnings("unchecked") + SolrCache filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance(); + if (filterCache != null) filterCache.clear(); + @SuppressWarnings("unchecked") + SolrCache queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance(); + if (queryResultCache != null) queryResultCache.clear(); + @SuppressWarnings("unchecked") + SolrCache documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance(); + if (documentCache != null) documentCache.clear(); + } + public SolrInstance getInstance() { return this.instance; } diff --git a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java index c6d51e8ec..19fa604c5 100644 --- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java @@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo this.solr0 = solr0; this.solr1 = solr1; } + + @Override + public void clearCaches() { + if (this.solr0 != null) this.solr0.clearCaches(); + if (this.solr1 != null) this.solr1.clearCaches(); + } public boolean isConnected0() { return this.solr0 != null; diff --git a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java index 4e2a9369f..0ab5f8b31 100644 --- a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java @@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn super.close(); } + @Override + public void clearCaches() { + // we do not have a direct access to the caches here, thus we simply do nothing. + } + @Override public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException { // during the solr query we set the thread name to the query string to get more debugging info in thread dumps @@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn } System.exit(0); } + } diff --git a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java index 8fb31c531..f28d26f09 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java @@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.ModifiableSolrParams; public interface SolrConnector extends Iterable /* Iterable of document IDs */ { - + + /** + * clear all caches: inside solr and ouside solr within the implementations of this interface + */ + public void clearCaches(); + /** * get the size of the index * @return number of results if solr is queries with a catch-all pattern diff --git a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java index f12d43950..aec6352f0 100644 --- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java +++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java @@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen public SolrServer getServer() { return this.server; } - + @Override public void commit(final boolean softCommit) { synchronized (this.server) { diff --git a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java index 6b9b7a939..1d49fd537 100644 --- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java +++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java @@ -24,7 +24,6 @@ import java.util.Collection; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.cora.federate.solr.connector.CachedSolrConnector; import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.MirrorSolrConnector; @@ -161,9 +160,9 @@ public class InstanceMirror { return msc; } - public void clearCache() { + public void clearCaches() { for (SolrConnector csc: this.connectorCache.values()) { - if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache(); + csc.clearCaches(); } for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true); } diff --git a/source/net/yacy/search/ResourceObserver.java b/source/net/yacy/search/ResourceObserver.java index 9cc6a58e7..32e8d2396 100644 --- a/source/net/yacy/search/ResourceObserver.java +++ b/source/net/yacy/search/ResourceObserver.java @@ -129,7 +129,7 @@ public class ResourceObserver { if(MemoryControl.properState()) return Space.HIGH; // clear some caches - @all: are there more of these, we could clear here? - this.sb.index.clearCache(); + this.sb.index.clearCaches(); SearchEventCache.cleanupEvents(true); this.sb.trail.clear(); Switchboard.urlBlacklist.clearblacklistCache(); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 1cfe10298..0307b7e01 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2031,7 +2031,7 @@ public final class Switchboard extends serverSwitch { // clear caches if necessary if ( !MemoryControl.request(128000000L, false) ) { - this.index.clearCache(); + this.index.clearCaches(); SearchEventCache.cleanupEvents(false); this.trail.clear(); GuiHandler.clear(); diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 39ed4d89a..ea8a2bac5 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -225,10 +225,10 @@ public final class Fulltext { } } - public void clearCache() { + public void clearCaches() { if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache(); if (this.statsDump != null) this.statsDump.clear(); - this.solrInstances.clearCache(); + this.solrInstances.clearCaches(); this.statsDump = null; } @@ -250,7 +250,7 @@ public final class Fulltext { for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear(); } this.commit(false); - this.solrInstances.clearCache(); + this.solrInstances.clearCaches(); } } @@ -260,7 +260,7 @@ public final class Fulltext { if (instance != null) { for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear(); } - this.solrInstances.clearCache(); + this.solrInstances.clearCaches(); } } @@ -400,7 +400,7 @@ public final class Fulltext { throw new IOException(e.getMessage(), e); } this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); } public void putEdges(final Collection edges) throws IOException { @@ -412,7 +412,7 @@ public final class Fulltext { throw new IOException(e.getMessage(), e); } this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); } /** @@ -432,7 +432,7 @@ public final class Fulltext { throw new IOException(e.getMessage(), e); } this.statsDump = null; - if (MemoryControl.shortStatus()) clearCache(); + if (MemoryControl.shortStatus()) clearCaches(); } /** diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index df479736b..617d5269c 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -503,10 +503,10 @@ public class Segment { } } - public void clearCache() { + public void clearCaches() { if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache(); if (this.termIndex != null) this.termIndex.clearCache(); - this.fulltext.clearCache(); + this.fulltext.clearCaches(); } public File getLocation() {