diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html
index 77e9b44b2..0878438d3 100644
--- a/htroot/PerformanceQueues_p.html
+++ b/htroot/PerformanceQueues_p.html
@@ -87,14 +87,19 @@ Changes take effect immediately
-
Maximum number of Word Caches:
-
-
+
Maximum number of Word Caches, low limit:
+
+
This is is the number of word indexes that shall be held in the
ram cache during indexing. When YaCy is shut down, this cache must be
- flushed to disc; this may last some minutes.
+ flushed to disc; this may last some minutes. The low limit is valid for crawling tasks, the high limit is valid
+ for search and DHT transmission tasks.
+
+
Maximum number of Word Caches, high limit:
+
+
diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java
index c0e954836..9dec459be 100644
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@@ -171,9 +171,11 @@ public class PerformanceQueues_p {
prop.put("table", c);
if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
- int wordCacheMax = Integer.parseInt((String) post.get("wordCacheMax", "10000"));
- switchboard.setConfig("wordCacheMax", Integer.toString(wordCacheMax));
- switchboard.wordIndex.setMaxWords(wordCacheMax);
+ int wordCacheMaxLow = Integer.parseInt((String) post.get("wordCacheMaxLow", "8000"));
+ int wordCacheMaxHigh = Integer.parseInt((String) post.get("wordCacheMaxHigh", "10000"));
+ switchboard.setConfig("wordCacheMaxLow", Integer.toString(wordCacheMaxLow));
+ switchboard.setConfig("wordCacheMaxHigh", Integer.toString(wordCacheMaxHigh));
+ switchboard.wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
int maxWaitingWordFlush = Integer.parseInt((String) post.get("maxWaitingWordFlush", "180"));
switchboard.setConfig("maxWaitingWordFlush", Integer.toString(maxWaitingWordFlush));
}
@@ -232,7 +234,8 @@ public class PerformanceQueues_p {
prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
- prop.put("wordCacheMax", switchboard.getConfig("wordCacheMax", "10000"));
+ prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
+ prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));
prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000"));
prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess);
diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java
index a9f5e2350..782b50518 100644
--- a/htroot/yacy/transferRWI.java
+++ b/htroot/yacy/transferRWI.java
@@ -120,7 +120,7 @@ public final class transferRWI {
wordHash = estring.substring(0, p);
wordhashes[i] = wordHash;
entry = new plasmaWordIndexEntry(estring.substring(p));
- sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry));
+ sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), true);
urlHash = entry.getUrlHash();
if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) {
diff --git a/source/de/anomic/plasma/plasmaSearch.java b/source/de/anomic/plasma/plasmaSearch.java
index 76edf0c16..19469cf6f 100644
--- a/source/de/anomic/plasma/plasmaSearch.java
+++ b/source/de/anomic/plasma/plasmaSearch.java
@@ -79,7 +79,7 @@ public final class plasmaSearch {
}
public void addWords(plasmaWordIndexEntryContainer container) {
- wordIndex.addEntries(container);
+ wordIndex.addEntries(container, true);
}
public int addPageIndex(URL url, String urlHash, Date urlModified, plasmaCondenser condenser,
@@ -108,7 +108,7 @@ public final class plasmaSearch {
wordHash = plasmaWordIndexEntry.word2hash(word);
entry = new plasmaWordIndexEntry(urlHash, count, p++, 0, 0,
age, quality, language, doctype, true);
- this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry));
+ this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false);
}
//System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries");
return condenser.getWords().size();
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index d13190cda..aed15375f 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -306,8 +306,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log);
- int wordCacheMax = Integer.parseInt((String) getConfig("wordCacheMax", "10000"));
- wordIndex.setMaxWords(wordCacheMax);
+ int wordCacheMaxLow = Integer.parseInt((String) getConfig("wordCacheMaxLow", "8000"));
+ int wordCacheMaxHigh = Integer.parseInt((String) getConfig("wordCacheMaxHigh", "10000"));
+ wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
searchManager = new plasmaSearch(urlPool.loadedURL, wordIndex);
// start a cache manager
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index 6884f22de..e756a2a16 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -93,12 +93,12 @@ public final class plasmaWordIndex {
return ramCache.assortmentsCacheFillStatusCml();
}
- public void setMaxWords(int maxWords) {
- ramCache.setMaxWords(maxWords);
+ public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
+ ramCache.setMaxWords(maxWordsLow, maxWordsHigh);
}
- public int addEntries(plasmaWordIndexEntryContainer entries) {
- return ramCache.addEntries(entries, System.currentTimeMillis());
+ public int addEntries(plasmaWordIndexEntryContainer entries, boolean highPriority) {
+ return ramCache.addEntries(entries, System.currentTimeMillis(), highPriority);
}
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) {
diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java
index 86482df7f..b31564ae8 100644
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@@ -74,8 +74,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
private final TreeMap cache;
private final kelondroMScoreCluster hashScore;
private final kelondroMScoreCluster hashDate;
- private long startTime;
- private int maxWords;
+ private long startTime;
+ private int maxWordsLow, maxWordsHigh; // we have 2 cache limits for different priorities
private final serverLog log;
private final plasmaWordIndexAssortmentCluster assortmentCluster;
private int assortmentBufferSize; //kb
@@ -115,7 +115,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new kelondroMScoreCluster();
this.startTime = System.currentTimeMillis();
- this.maxWords = 10000;
+ this.maxWordsLow = 8000;
+ this.maxWordsHigh = 10000;
this.backend = backend;
this.log = log;
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, log);
@@ -254,8 +255,9 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return cache.size();
}
- public void setMaxWords(int maxWords) {
- this.maxWords = maxWords;
+ public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
+ this.maxWordsLow = maxWordsLow;
+ this.maxWordsHigh = maxWordsHigh;
}
public int[] assortmentsSizes() {
@@ -319,7 +321,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
} else {
flushFromMem();
try {
- pausetime = 1 + java.lang.Math.min(1000, 5 * maxWords/(cache.size() + 1));
+ pausetime = 1 + java.lang.Math.min(1000, 5 * maxWordsHigh/(cache.size() + 1));
if (cache.size() == 0) pausetime = 2000;
this.sleep(pausetime);
} catch (InterruptedException e) {}
@@ -395,11 +397,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return container.size();
} else {
// *** should care about another option here ***
- return backend.addEntries(feedback, time);
+ return backend.addEntries(feedback, time, true);
}
} else {
// store to back-end; this should be a rare case
- return backend.addEntries(container, time);
+ return backend.addEntries(container, time, true);
}
}
@@ -420,7 +422,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
} else {
// we have a non-empty entry-container
// integrate it to the backend
- return backend.addEntries(container, container.updated()) > 0;
+ return backend.addEntries(container, container.updated(), true) > 0;
}
}
@@ -464,17 +466,22 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return removed;
}
- public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime) {
+ public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean highPriority) {
// this puts the entries into the cache, not into the assortment directly
// check cache space
if (cache.size() > 0) try {
// pause to get space in the cache (while it is flushed)
- if (cache.size() + 1000 >= this.maxWords) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWords + 1000));
+ long pausetime;
+ if (highPriority) {
+ if (cache.size() + 1000 >= this.maxWordsHigh) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsHigh + 1000));
+ pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsHigh + 1));
+ } else {
+ if (cache.size() + 1000 >= this.maxWordsLow) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsLow + 1000));
+ pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsLow + 1));
+ }
// slow down if we reach cache limit
- long pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWords + 1));
- //System.out.println("Pausetime=" + pausetime);
Thread.sleep(pausetime);
} catch (InterruptedException e) {}
diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java b/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java
index 0267ca34e..60a06c9ad 100644
--- a/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java
+++ b/source/de/anomic/plasma/plasmaWordIndexClassicCacheMigration.java
@@ -166,7 +166,7 @@ public class plasmaWordIndexClassicCacheMigration {
new String(row[j + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort));
}
container.add(entries, System.currentTimeMillis());
- fresh.addEntries(container);
+ fresh.addEntries(container, false);
i = null;
remove(hash);
return true;
diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
index 7f0d96aea..033d3b3c3 100644
--- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
+++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
@@ -226,7 +226,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
}
}
- public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
+ public int addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache
if ((container == null) || (container.size() == 0)) return 0;
diff --git a/source/de/anomic/plasma/plasmaWordIndexInterface.java b/source/de/anomic/plasma/plasmaWordIndexInterface.java
index 0187bb877..86ea3a854 100644
--- a/source/de/anomic/plasma/plasmaWordIndexInterface.java
+++ b/source/de/anomic/plasma/plasmaWordIndexInterface.java
@@ -55,7 +55,7 @@ public interface plasmaWordIndexInterface {
public void deleteIndex(String wordHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
- public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime);
+ public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean highPriority);
public void close(int waitingSeconds);
diff --git a/yacy.init b/yacy.init
index 20a11c478..c19514a2c 100644
--- a/yacy.init
+++ b/yacy.init
@@ -492,12 +492,15 @@ javastart_Xms=Xms10m
# performance properties for the word index cache
-# wordCacheMax is the number of word indexes that shall be held in the
+# wordCacheMaxLow/High is the number of word indexes that shall be held in the
# ram cache during indexing. When YaCy is shut down, this cache must be
# flushed to disc; this may last some minutes.
+# The low value is valid for crawling tasks, the high value is valid for
+# remote index transmissions and search requests
# maxWaitingWordFlush gives the number of seconds that the shutdown
# may last for the word flush
-wordCacheMax = 10000
+wordCacheMaxLow = 12000
+wordCacheMaxHigh = 16000
maxWaitingWordFlush = 180
# Specifies if yacy can be used as transparent http proxy.