From c48374d14adaacc656ea49d3ce578672a9d8e536 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 22 Dec 2006 12:54:56 +0000 Subject: [PATCH] new memory limit computation for indexing queue shall better prevent outofmemory errors git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3118 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/index/indexCachedRI.java | 4 ++++ source/de/anomic/index/indexCollectionRI.java | 6 ++++++ source/de/anomic/index/indexRAMRI.java | 10 +++------- source/de/anomic/index/indexRI.java | 1 + .../de/anomic/kelondro/kelondroCollectionIndex.java | 13 +++++++++++++ source/de/anomic/kelondro/kelondroRowSet.java | 1 + source/de/anomic/plasma/plasmaSwitchboard.java | 6 ++++++ source/de/anomic/plasma/plasmaWordIndex.java | 4 ++++ .../anomic/plasma/plasmaWordIndexFileCluster.java | 4 ++++ yacy.init | 8 ++++---- 10 files changed, 46 insertions(+), 11 deletions(-) diff --git a/source/de/anomic/index/indexCachedRI.java b/source/de/anomic/index/indexCachedRI.java index ecf6eff23..21763a432 100644 --- a/source/de/anomic/index/indexCachedRI.java +++ b/source/de/anomic/index/indexCachedRI.java @@ -62,6 +62,10 @@ public class indexCachedRI implements indexRI { return payloadrow; } + public int minMem() { + return 1024 * 1024; + } + public void setWordFlushDivisor(int idleDivisor, int busyDivisor) { this.idleDivisor = idleDivisor; this.busyDivisor = busyDivisor; diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java index 41dadcf42..af302501c 100644 --- a/source/de/anomic/index/indexCollectionRI.java +++ b/source/de/anomic/index/indexCollectionRI.java @@ -83,6 +83,12 @@ public class indexCollectionRI implements indexRI { } } + public int minMem() { + // calculate a minimum amount of memory that is necessary to use the index + // during runtime (after the object was initialized) + return collectionIndex.minMem(); + } + public synchronized Iterator wordContainers(String startWordHash, boolean rot) { return new wordContainersIterator(startWordHash, rot); } diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java index 3f3749823..710719882 100644 --- a/source/de/anomic/index/indexRAMRI.java +++ b/source/de/anomic/index/indexRAMRI.java @@ -62,13 +62,6 @@ public final class indexRAMRI implements indexRI { private kelondroRow payloadrow; private kelondroRow bufferStructureBasis; - // calculated constants - private static String maxKey; - static { - maxKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += 'z'; - //minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-'; - } - public indexRAMRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log) { // creates a new index cache @@ -98,6 +91,9 @@ public final class indexRAMRI implements indexRI { } } + public int minMem() { + return 1024*1024; + } public synchronized long getUpdateTime(String wordHash) { indexContainer entries = getContainer(wordHash, null, -1); diff --git a/source/de/anomic/index/indexRI.java b/source/de/anomic/index/indexRI.java index 733f852ba..16e9097bc 100644 --- a/source/de/anomic/index/indexRI.java +++ b/source/de/anomic/index/indexRI.java @@ -34,6 +34,7 @@ import java.util.Set; public interface indexRI { public int size(); + public int minMem(); public Iterator wordContainers(String startWordHash, boolean rot); // method to replace wordHashes diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 3ca770532..26400c6b6 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -251,6 +251,19 @@ public class kelondroCollectionIndex { return index.size(); } + public int minMem() { + // calculate a minimum amount of memory that is necessary to use the collection + // during runtime (after the index was initialized) + + // caclculate an upper limit (not the correct size) of the maximum number of indexes for a wordHash + // this is computed by the size of the biggest used collection + int m = 1; + for (int i = 0; i < arrays.size(); i++) m = m * this.loadfactor; + + // this must be multiplied with the payload size + // and doubled for necessary memory transformation during sort operation + return 2 * m * this.payloadrow.objectsize; + } public synchronized void put(byte[] key, kelondroRowCollection collection) throws IOException, kelondroOutOfLimitsException { // this replaces an old collection by a new one diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 978ee514e..1d826694b 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -228,6 +228,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd (newColumn != rowdef.primaryKey)) { resolveMarkedRemoved(); rowdef.setOrdering(newOrder, newColumn); + assert (removeMarker.size() == 0); this.sortBound = 0; } } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 88090e566..f3ab267da 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -444,6 +444,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser int wordInCacheMaxCount = (int) getConfigLong("indexDistribution.dhtReceiptLimit", 1000); wordIndex.setInMaxWordCount(wordInCacheMaxCount); + // set a minimum amount of memory for the indexer thread + setConfig("80_indexing_memprereq", Math.max(getConfigLong("80_indexing_memprereq", 0), wordIndex.minMem())); + // start a cache manager log.logConfig("Starting HT Cache Manager"); @@ -1184,6 +1187,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (yacyCore.newsPool.automaticProcess() > 0) hasDoneSomething = true; } catch (IOException e) {} + // set new memory limit for indexer thread + setConfig("80_indexing_memprereq", Math.max(getConfigLong("80_indexing_memprereq", 0), wordIndex.minMem())); + return hasDoneSomething; } catch (InterruptedException e) { this.log.logInfo("cleanupJob: Shutdown detected"); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index e936781f3..93996a849 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -81,6 +81,10 @@ public final class plasmaWordIndex implements indexRI { this.idleDivisor = 420; } + public int minMem() { + return dhtOutCache.minMem() + dhtInCache.minMem() + collections.minMem(); + } + public int maxURLinDHTOutCache() { return dhtOutCache.maxURLinCache(); } diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index c2b7aca0a..7d229fe17 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -279,4 +279,8 @@ public class plasmaWordIndexFileCluster implements indexRI { throw new UnsupportedOperationException(); } + public int minMem() { + return 1024*1024; + } + } diff --git a/yacy.init b/yacy.init index d964ea011..bb5b99532 100644 --- a/yacy.init +++ b/yacy.init @@ -495,7 +495,7 @@ xpstopw=true 70_cachemanager_busysleep=0 70_cachemanager_memprereq=1048576 80_indexing_idlesleep=2000 -80_indexing_busysleep=100 +80_indexing_busysleep=200 80_indexing_memprereq=2097152 82_crawlstack_idlesleep=5000 82_crawlstack_busysleep=0 @@ -572,10 +572,10 @@ ramCacheProfiles_time= 500 # not for first startup of YaCy # -Xmx set maximum Java heap size -javastart_Xmx=Xmx64m +javastart_Xmx=Xmx96m # -Xms set initial Java heap size -javastart_Xms=Xms10m +javastart_Xms=Xms96m # performance properties for the word index cache # wordCacheMaxLow/High is the number of word indexes that shall be held in the @@ -587,7 +587,7 @@ javastart_Xms=Xms10m # may last for the word flush wordCacheMaxCount = 20000 wordCacheInitCount = 30000 -wordFlushIdleDivisor = 420; +wordFlushIdleDivisor = 500; wordFlushBusyDivisor = 5000; # Specifies if yacy can be used as transparent http proxy.