diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html index 77e9b44b2..0878438d3 100644 --- a/htroot/PerformanceQueues_p.html +++ b/htroot/PerformanceQueues_p.html @@ -87,14 +87,19 @@ Changes take effect immediately - Maximum number of Word Caches: - - + Maximum number of Word Caches, low limit: + + This is is the number of word indexes that shall be held in the ram cache during indexing. When YaCy is shut down, this cache must be - flushed to disc; this may last some minutes. + flushed to disc; this may last some minutes. The low limit is valid for crawling tasks, the high limit is valid + for search and DHT transmission tasks. + + Maximum number of Word Caches, high limit: + + diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index c0e954836..9dec459be 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -171,9 +171,11 @@ public class PerformanceQueues_p { prop.put("table", c); if ((post != null) && (post.containsKey("cacheSizeSubmit"))) { - int wordCacheMax = Integer.parseInt((String) post.get("wordCacheMax", "10000")); - switchboard.setConfig("wordCacheMax", Integer.toString(wordCacheMax)); - switchboard.wordIndex.setMaxWords(wordCacheMax); + int wordCacheMaxLow = Integer.parseInt((String) post.get("wordCacheMaxLow", "8000")); + int wordCacheMaxHigh = Integer.parseInt((String) post.get("wordCacheMaxHigh", "10000")); + switchboard.setConfig("wordCacheMaxLow", Integer.toString(wordCacheMaxLow)); + switchboard.setConfig("wordCacheMaxHigh", Integer.toString(wordCacheMaxHigh)); + switchboard.wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh); int maxWaitingWordFlush = Integer.parseInt((String) post.get("maxWaitingWordFlush", "180")); switchboard.setConfig("maxWaitingWordFlush", Integer.toString(maxWaitingWordFlush)); } @@ -232,7 +234,8 @@ public class PerformanceQueues_p { prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize()); prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache()); prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180")); - prop.put("wordCacheMax", switchboard.getConfig("wordCacheMax", "10000")); + prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000")); + prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000")); prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000")); prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess); diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index a9f5e2350..782b50518 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -120,7 +120,7 @@ public final class transferRWI { wordHash = estring.substring(0, p); wordhashes[i] = wordHash; entry = new plasmaWordIndexEntry(estring.substring(p)); - sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry)); + sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), true); urlHash = entry.getUrlHash(); if ((!(unknownURL.contains(urlHash))) && (!(sb.urlPool.loadedURL.exists(urlHash)))) { diff --git a/source/de/anomic/plasma/plasmaSearch.java b/source/de/anomic/plasma/plasmaSearch.java index 76edf0c16..19469cf6f 100644 --- a/source/de/anomic/plasma/plasmaSearch.java +++ b/source/de/anomic/plasma/plasmaSearch.java @@ -79,7 +79,7 @@ public final class plasmaSearch { } public void addWords(plasmaWordIndexEntryContainer container) { - wordIndex.addEntries(container); + wordIndex.addEntries(container, true); } public int addPageIndex(URL url, String urlHash, Date urlModified, plasmaCondenser condenser, @@ -108,7 +108,7 @@ public final class plasmaSearch { wordHash = plasmaWordIndexEntry.word2hash(word); entry = new plasmaWordIndexEntry(urlHash, count, p++, 0, 0, age, quality, language, doctype, true); - this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry)); + this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false); } //System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries"); return condenser.getWords().size(); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index d13190cda..aed15375f 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -306,8 +306,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL); wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log); - int wordCacheMax = Integer.parseInt((String) getConfig("wordCacheMax", "10000")); - wordIndex.setMaxWords(wordCacheMax); + int wordCacheMaxLow = Integer.parseInt((String) getConfig("wordCacheMaxLow", "8000")); + int wordCacheMaxHigh = Integer.parseInt((String) getConfig("wordCacheMaxHigh", "10000")); + wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh); searchManager = new plasmaSearch(urlPool.loadedURL, wordIndex); // start a cache manager diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 6884f22de..e756a2a16 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -93,12 +93,12 @@ public final class plasmaWordIndex { return ramCache.assortmentsCacheFillStatusCml(); } - public void setMaxWords(int maxWords) { - ramCache.setMaxWords(maxWords); + public void setMaxWords(int maxWordsLow, int maxWordsHigh) { + ramCache.setMaxWords(maxWordsLow, maxWordsHigh); } - public int addEntries(plasmaWordIndexEntryContainer entries) { - return ramCache.addEntries(entries, System.currentTimeMillis()); + public int addEntries(plasmaWordIndexEntryContainer entries, boolean highPriority) { + return ramCache.addEntries(entries, System.currentTimeMillis(), highPriority); } public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) { diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index 86482df7f..b31564ae8 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -74,8 +74,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { private final TreeMap cache; private final kelondroMScoreCluster hashScore; private final kelondroMScoreCluster hashDate; - private long startTime; - private int maxWords; + private long startTime; + private int maxWordsLow, maxWordsHigh; // we have 2 cache limits for different priorities private final serverLog log; private final plasmaWordIndexAssortmentCluster assortmentCluster; private int assortmentBufferSize; //kb @@ -115,7 +115,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { this.hashScore = new kelondroMScoreCluster(); this.hashDate = new kelondroMScoreCluster(); this.startTime = System.currentTimeMillis(); - this.maxWords = 10000; + this.maxWordsLow = 8000; + this.maxWordsHigh = 10000; this.backend = backend; this.log = log; this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, log); @@ -254,8 +255,9 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { return cache.size(); } - public void setMaxWords(int maxWords) { - this.maxWords = maxWords; + public void setMaxWords(int maxWordsLow, int maxWordsHigh) { + this.maxWordsLow = maxWordsLow; + this.maxWordsHigh = maxWordsHigh; } public int[] assortmentsSizes() { @@ -319,7 +321,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } else { flushFromMem(); try { - pausetime = 1 + java.lang.Math.min(1000, 5 * maxWords/(cache.size() + 1)); + pausetime = 1 + java.lang.Math.min(1000, 5 * maxWordsHigh/(cache.size() + 1)); if (cache.size() == 0) pausetime = 2000; this.sleep(pausetime); } catch (InterruptedException e) {} @@ -395,11 +397,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { return container.size(); } else { // *** should care about another option here *** - return backend.addEntries(feedback, time); + return backend.addEntries(feedback, time, true); } } else { // store to back-end; this should be a rare case - return backend.addEntries(container, time); + return backend.addEntries(container, time, true); } } @@ -420,7 +422,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } else { // we have a non-empty entry-container // integrate it to the backend - return backend.addEntries(container, container.updated()) > 0; + return backend.addEntries(container, container.updated(), true) > 0; } } @@ -464,17 +466,22 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { return removed; } - public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime) { + public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean highPriority) { // this puts the entries into the cache, not into the assortment directly // check cache space if (cache.size() > 0) try { // pause to get space in the cache (while it is flushed) - if (cache.size() + 1000 >= this.maxWords) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWords + 1000)); + long pausetime; + if (highPriority) { + if (cache.size() + 1000 >= this.maxWordsHigh) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsHigh + 1000)); + pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsHigh + 1)); + } else { + if (cache.size() + 1000 >= this.maxWordsLow) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsLow + 1000)); + pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsLow + 1)); + } // slow down if we reach cache limit - long pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWords + 1)); - //System.out.println("Pausetime=" + pausetime); Thread.sleep(pausetime); } catch (InterruptedException e) {} diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java b/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java index 0267ca34e..60a06c9ad 100644 --- a/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java +++ b/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java @@ -166,7 +166,7 @@ public class plasmaWordIndexClassicCacheMigration { new String(row[j + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort)); } container.add(entries, System.currentTimeMillis()); - fresh.addEntries(container); + fresh.addEntries(container, false); i = null; remove(hash); return true; diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java index 7f0d96aea..033d3b3c3 100644 --- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java +++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java @@ -226,7 +226,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface { } } - public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) { + public int addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) { //System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug // fetch the index cache if ((container == null) || (container.size() == 0)) return 0; diff --git a/source/de/anomic/plasma/plasmaWordIndexInterface.java b/source/de/anomic/plasma/plasmaWordIndexInterface.java index 0187bb877..86ea3a854 100644 --- a/source/de/anomic/plasma/plasmaWordIndexInterface.java +++ b/source/de/anomic/plasma/plasmaWordIndexInterface.java @@ -55,7 +55,7 @@ public interface plasmaWordIndexInterface { public void deleteIndex(String wordHash); public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete); - public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime); + public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean highPriority); public void close(int waitingSeconds); diff --git a/yacy.init b/yacy.init index 20a11c478..c19514a2c 100644 --- a/yacy.init +++ b/yacy.init @@ -492,12 +492,15 @@ javastart_Xms=Xms10m # performance properties for the word index cache -# wordCacheMax is the number of word indexes that shall be held in the +# wordCacheMaxLow/High is the number of word indexes that shall be held in the # ram cache during indexing. When YaCy is shut down, this cache must be # flushed to disc; this may last some minutes. +# The low value is valid for crawling tasks, the high value is valid for +# remote index transmissions and search requests # maxWaitingWordFlush gives the number of seconds that the shutdown # may last for the word flush -wordCacheMax = 10000 +wordCacheMaxLow = 12000 +wordCacheMaxHigh = 16000 maxWaitingWordFlush = 180 # Specifies if yacy can be used as transparent http proxy.