+
- Words in RAM Cache: |
- #[wordCacheRAMSize]# |
+ Words in RAM cache: |
+ #[wordCacheWSize]# |
+ #[wordCacheKSize]# |
- This is the current size of the word cache.
- The smaller this number, the faster the shut-down procedure will be.
- The maximum of this cache can be set below.
+ This is the current size of the word caches.
+ The indexing cache speeds up the indexing process, the DHT cache holds indexes temporary for approval.
+ The maximum of this caches can be set below.
|
Maximum URLs currently assigned to one cached word: |
- #[maxURLinWordCache]# |
+ #[maxURLinWCache]# |
+ not controlled for DHT cache |
This is the maximum size of URLs assigned to a single word cache entry.
If this is a big number, it shows that the caching works efficiently.
|
- Maximum Age of Word in cache: |
- #[maxAgeOfWordCache]# |
+ Maximum age of a word: |
+ #[maxAgeOfWCache]# |
+ #[maxAgeOfKCache]# |
- This is the maximum age of a word index that is in the RAM cache in minutes.
+ This is the maximum age of a word in an index in minutes.
|
- Minimum Age of Word in cache: |
- #[minAgeOfWordCache]# |
+ Minimum age of a word: |
+ #[minAgeOfWCache]# |
+ #[minAgeOfKCache]# |
- This is the minimum age of a word index that is in the RAM cache in minutes.
+ This is the minimum age of a word in an index in minutes.
|
- Maximum number of Word Caches, low limit: |
- |
+ Maximum number of words in cache: |
+ |
+ cannot be set for DHT |
This is is the number of word indexes that shall be held in the
ram cache during indexing. When YaCy is shut down, this cache must be
- flushed to disc; this may last some minutes. The low limit is valid for crawling tasks, the high limit is valid
- for search and DHT transmission tasks.
+ flushed to disc; this may last some minutes.
|
-
- Maximum number of Word Caches, high limit: |
- |
-
-
+ |
Changes take effect immediately |
diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java
index de43f806d..b2e8d1f34 100644
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@@ -142,15 +142,14 @@ public class PerformanceQueues_p {
idlesleep = Long.parseLong(d((String) defaultSettings.get(threadName + "_idlesleep"), "1000"));
busysleep = Long.parseLong(d((String) defaultSettings.get(threadName + "_busysleep"), "100"));
memprereq = Long.parseLong(d((String) defaultSettings.get(threadName + "_memprereq"), "0"));
-
+
// check values to prevent short-cut loops
if (idlesleep < 1000) idlesleep = 1000;
if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep = 0; memprereq = 0; }
if ((threadName.equals("50_localcrawl")) && (busysleep < 100)) busysleep = 100;
if ((threadName.equals("61_globalcrawltrigger")) && (busysleep < 100)) busysleep = 100;
if ((threadName.equals("62_remotetriggeredcrawl")) && (busysleep < 100)) busysleep = 100;
-
-
+
// on-the-fly re-configuration
switchboard.setThreadPerformance(threadName, idlesleep, busysleep, memprereq);
switchboard.setConfig(threadName + "_idlesleep", idlesleep);
@@ -171,12 +170,9 @@ public class PerformanceQueues_p {
prop.put("table", c);
if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
- int wordCacheMaxLow = post.getInt("wordCacheMaxLow", 8000);
- int wordCacheMaxHigh = post.getInt("wordCacheMaxHigh", 10000);
- if (wordCacheMaxLow > wordCacheMaxHigh) wordCacheMaxLow = wordCacheMaxHigh;
- switchboard.setConfig("wordCacheMaxLow", Integer.toString(wordCacheMaxLow));
- switchboard.setConfig("wordCacheMaxHigh", Integer.toString(wordCacheMaxHigh));
- switchboard.wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
+ int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 10000);
+ switchboard.setConfig("wordCacheMaxCount", Integer.toString(wordCacheMaxCount));
+ switchboard.wordIndex.setMaxWordCount(wordCacheMaxCount);
int maxWaitingWordFlush = post.getInt("maxWaitingWordFlush", 180);
switchboard.setConfig("maxWaitingWordFlush", Integer.toString(maxWaitingWordFlush));
}
@@ -251,13 +247,15 @@ public class PerformanceQueues_p {
}
// table cache settings
- prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
- prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
- prop.put("maxAgeOfWordCache", "" + (switchboard.wordIndex.maxAgeOfWordCache() / 1000 / 60)); // minutes
- prop.put("minAgeOfWordCache", "" + (switchboard.wordIndex.minAgeOfWordCache() / 1000 / 60)); // minutes
+ prop.put("wordCacheWSize", switchboard.wordIndex.wSize());
+ prop.put("wordCacheKSize", switchboard.wordIndex.kSize());
+ prop.put("maxURLinWCache", "" + switchboard.wordIndex.maxURLinWCache());
+ prop.put("maxAgeOfWCache", "" + (switchboard.wordIndex.maxAgeOfWCache() / 1000 / 60)); // minutes
+ prop.put("minAgeOfWCache", "" + (switchboard.wordIndex.minAgeOfWCache() / 1000 / 60)); // minutes
+ prop.put("maxAgeOfKCache", "" + (switchboard.wordIndex.maxAgeOfKCache() / 1000 / 60)); // minutes
+ prop.put("minAgeOfKCache", "" + (switchboard.wordIndex.minAgeOfKCache() / 1000 / 60)); // minutes
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
- prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
- prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));
+ prop.put("wordCacheMaxCount", switchboard.getConfig("wordCacheMaxCount", "10000"));
prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000"));
prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess);
diff --git a/htroot/xml/status_p.java b/htroot/xml/status_p.java
index af9bc3d9e..3cd8c35e5 100644
--- a/htroot/xml/status_p.java
+++ b/htroot/xml/status_p.java
@@ -64,7 +64,7 @@ public class status_p {
prop.put("rejected", 0);
yacyCore.peerActions.updateMySeed();
prop.put("ppm", yacyCore.seedDB.mySeed.get(yacySeed.ISPEED, "unknown"));
- prop.put("wordCacheSize", switchboard.wordIndex.wordCacheRAMSize());
+ prop.put("wordCacheSize", switchboard.wordIndex.wSize() + switchboard.wordIndex.kSize());
prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));
diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java
index 7084616a9..85e711d95 100644
--- a/source/de/anomic/kelondro/kelondroCollectionIndex.java
+++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java
@@ -27,7 +27,6 @@ package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
-import java.util.Iterator;
public class kelondroCollectionIndex {
diff --git a/source/de/anomic/kelondro/kelondroMScoreCluster.java b/source/de/anomic/kelondro/kelondroMScoreCluster.java
index e67d61e89..d32c92dfc 100644
--- a/source/de/anomic/kelondro/kelondroMScoreCluster.java
+++ b/source/de/anomic/kelondro/kelondroMScoreCluster.java
@@ -73,17 +73,9 @@ public final class kelondroMScoreCluster {
} catch (ParseException e) {}
}
- /*
- public static int string2score(String s) {
- int i = string2scoreX(s);
- System.out.println("string2core(" + s + ") = " + i);
- return i;
- }
- */
-
public static int string2score(String s) {
// this can be used to calculate a score from a string
-
+ if ((s == null) || (s.length() == 0) || (s.charAt(0) == '-')) return 0;
try {
long l = 0;
if (s.length() == shortDateFormatString.length()) {
@@ -97,7 +89,10 @@ public final class kelondroMScoreCluster {
}
// fix out-of-ranges
if (l > Integer.MAX_VALUE) return Integer.MAX_VALUE;
- if (l < 0) return 0;
+ if (l < 0) {
+ System.out.println("string2score: negative score for input " + s);
+ return 0;
+ }
return (int) l;
} catch (Exception e) {
// try it lex
@@ -110,7 +105,10 @@ public final class kelondroMScoreCluster {
}
for (int i = len; i < 5; i++) c <<= 6;
if (c > Integer.MAX_VALUE) return Integer.MAX_VALUE;
- if (c < 0) return 0;
+ if (c < 0) {
+ System.out.println("string2score: negative score for input " + s);
+ return 0;
+ }
return c;
}
}
@@ -411,14 +409,18 @@ public final class kelondroMScoreCluster {
public static void main(String[] args) {
- if (args.length > 0) System.out.println("score of " + args[0] + ": " + string2score(args[0]));
- //System.exit(0);
+ String t = "ZZZZZZZZZZ";
+ System.out.println("score of " + t + ": " + string2score(t));
+ if (args.length > 0) {
+ System.out.println("score of " + args[0] + ": " + string2score(args[0]));
+ System.exit(0);
+ }
System.out.println("Test for Score: start");
kelondroMScoreCluster s = new kelondroMScoreCluster();
- long c = 0;
+ long c = 0;
- // create cluster
+ // create cluster
long time = System.currentTimeMillis();
Random random = new Random(1234);
int r;
diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java
index adbd10c5a..ed2c143fb 100644
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@@ -304,7 +304,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
rcGlobal.setWordHash(wordHash);
- wordIndex.addEntries(rcGlobal, System.currentTimeMillis(), true);
+ wordIndex.addEntries(rcGlobal, System.currentTimeMillis(), false);
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
}
// the rcGlobal was flushed, empty it
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index c0fbf32f1..d5e6c7b64 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -369,9 +369,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log);
- int wordCacheMaxLow = (int) getConfigLong("wordCacheMaxLow", 8000);
- int wordCacheMaxHigh = (int) getConfigLong("wordCacheMaxHigh", 10000);
- wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
+ int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 10000);
+ wordIndex.setMaxWordCount(wordCacheMaxCount);
// start a cache manager
log.logConfig("Starting HT Cache Manager");
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index 47738e91b..2a0ae6d2c 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -90,20 +90,32 @@ public final class plasmaWordIndex {
return databaseRoot;
}
- public int maxURLinWordCache() {
- return ramCache.maxURLinWordCache();
+ public int maxURLinWCache() {
+ return ramCache.maxURLinWCache();
}
- public long minAgeOfWordCache() {
- return ramCache.minAgeOfWordCache();
+ public long minAgeOfWCache() {
+ return ramCache.minAgeOfWCache();
}
- public long maxAgeOfWordCache() {
- return ramCache.maxAgeOfWordCache();
+ public long maxAgeOfWCache() {
+ return ramCache.maxAgeOfWCache();
}
- public int wordCacheRAMSize() {
- return ramCache.wordCacheRAMSize();
+ public long minAgeOfKCache() {
+ return ramCache.minAgeOfKCache();
+ }
+
+ public long maxAgeOfKCache() {
+ return ramCache.maxAgeOfKCache();
+ }
+
+ public int wSize() {
+ return ramCache.wSize();
+ }
+
+ public int kSize() {
+ return ramCache.kSize();
}
public int[] assortmentsSizes() {
@@ -118,48 +130,49 @@ public final class plasmaWordIndex {
return assortmentCluster.cacheFillStatusCml();
}
- public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
- ramCache.setMaxWords(maxWordsLow, maxWordsHigh);
+ public void setMaxWordCount(int maxWords) {
+ ramCache.setMaxWordCount(maxWords);
}
- public void flushControl(boolean highPriority) {
+ public void flushControl(boolean dhtCase) {
// check for forced flush
- if (highPriority) {
- if (ramCache.size() > ramCache.getMaxWordsHigh()) {
- while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
+ ramCache.shiftK2W();
+ if (dhtCase) {
+ if (ramCache.wSize() > ramCache.getMaxWordCount()) {
+ while (ramCache.wSize() + 500 > ramCache.getMaxWordCount()) {
flushCache(1);
}
}
} else {
- while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheReferenceLimit) {
+ while (ramCache.maxURLinWCache() > plasmaWordIndexCache.wCacheReferenceLimit) {
flushCache(1);
}
- if (ramCache.size() > ramCache.getMaxWordsLow()) {
- while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
+ if (ramCache.wSize() > ramCache.getMaxWordCount()) {
+ while (ramCache.wSize() + 500 > ramCache.getMaxWordCount()) {
flushCache(1);
}
}
}
}
- public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long updateTime, boolean highPriority) {
- if (ramCache.addEntry(wordHash, entry, updateTime)) {
- flushControl(highPriority);
+ public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long updateTime, boolean dhtCase) {
+ if (ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) {
+ flushControl(dhtCase);
return true;
}
return false;
}
- public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean highPriority) {
- int added = ramCache.addEntries(entries, updateTime, highPriority);
+ public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean dhtCase) {
+ int added = ramCache.addEntries(entries, updateTime, dhtCase);
// force flush
- flushControl(highPriority);
+ flushControl(dhtCase);
return added;
}
public synchronized void flushCacheSome() {
- int flushCount = ramCache.size() / 1000;
+ int flushCount = ramCache.wSize() / 1000;
if (flushCount > 50) flushCount = 50;
if (flushCount < 3) flushCount = 3;
flushCache(flushCount);
@@ -167,7 +180,7 @@ public final class plasmaWordIndex {
public synchronized void flushCache(int count) {
for (int i = 0; i < count; i++) {
- if (ramCache.size() == 0) break;
+ if (ramCache.wSize() == 0) break;
flushCache(ramCache.bestFlushWordHash());
try {Thread.sleep(10);} catch (InterruptedException e) {}
}
@@ -316,7 +329,7 @@ public final class plasmaWordIndex {
public int size() {
return java.lang.Math.max(assortmentCluster.sizeTotal(),
- java.lang.Math.max(backend.size(), ramCache.size()));
+ java.lang.Math.max(backend.size(), ramCache.wSize() + ramCache.kSize()));
}
public int indexSize(String wordHash) {
diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java
index f85b1104e..e228f0f95 100644
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@@ -56,21 +56,24 @@ import de.anomic.kelondro.kelondroRecords;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
-public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
+public final class plasmaWordIndexCache /*implements plasmaWordIndexInterface*/ {
// environment constants
private static final String indexArrayFileName = "indexDump1.array";
- public static final int ramCacheReferenceLimit = 50;
- public static final long ramCacheMaxAge = 1000 * 60 * 60 * 2; // milliseconds; 2 Hours
- public static final long ramCacheMinAge = 1000 * 60 * 2; // milliseconds; 2 Minutes (Karenz for DHT Receive)
+ public static final int wCacheReferenceLimit = 50;
+ public static final long wCacheMaxAge = 1000 * 60 * 60 * 2; // milliseconds; 2 hours
+ public static final long wCacheMinAge = 1000; // milliseconds; 1 second
+ public static final long kCacheMaxAge = 1000 * 60 * 2; // milliseconds; 2 minutes
// class variables
private final File databaseRoot;
- private final TreeMap cache;
+ private final TreeMap wCache; // wordhash-container
+ private final TreeMap kCache; // time-container; for karenz/DHT caching (set with high priority)
private final kelondroMScoreCluster hashScore;
private final kelondroMScoreCluster hashDate;
+ private long kCacheInc = 0;
private long startTime;
- private int maxWordsLow, maxWordsHigh; // we have 2 cache limits for different priorities
+ private int wCacheMaxCount;
private final serverLog log;
// calculated constants
@@ -85,12 +88,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
this.databaseRoot = databaseRoot;
- this.cache = new TreeMap();
+ this.wCache = new TreeMap();
+ this.kCache = new TreeMap();
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new kelondroMScoreCluster();
+ this.kCacheInc = 0;
this.startTime = System.currentTimeMillis();
- this.maxWordsLow = 8000;
- this.maxWordsHigh = 10000;
+ this.wCacheMaxCount = 10000;
this.log = log;
// read in dump of last session
@@ -102,7 +106,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
private void dump(int waitingSeconds) throws IOException {
- log.logConfig("creating dump for index cache, " + cache.size() + " words (and much more urls)");
+ log.logConfig("creating dump for index cache, " + wCache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexArrayFileName);
if (indexDumpFile.exists()) indexDumpFile.delete();
kelondroArray dumpArray = null;
@@ -110,14 +114,41 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
long startTime = System.currentTimeMillis();
long messageTime = System.currentTimeMillis() + 5000;
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
- synchronized (cache) {
- Iterator i = cache.entrySet().iterator();
- Map.Entry entry;
- String wordHash;
- plasmaWordIndexEntryContainer container;
- long updateTime;
- plasmaWordIndexEntry wordEntry;
- byte[][] row = new byte[5][];
+ Map.Entry entry;
+ String wordHash;
+ plasmaWordIndexEntryContainer container;
+ long updateTime;
+ plasmaWordIndexEntry wordEntry;
+ byte[][] row = new byte[5][];
+
+ // write kCache, this will be melted with the wCache upon load
+ synchronized (kCache) {
+ Iterator i = kCache.values().iterator();
+ while (i.hasNext()) {
+ container = (plasmaWordIndexEntryContainer) i.next();
+
+ // put entries on stack
+ if (container != null) {
+ Iterator ci = container.entries();
+ while (ci.hasNext()) {
+ wordEntry = (plasmaWordIndexEntry) ci.next();
+ row[0] = container.wordHash().getBytes();
+ row[1] = kelondroRecords.long2bytes(container.size(), 4);
+ row[2] = kelondroRecords.long2bytes(container.updated(), 8);
+ row[3] = wordEntry.getUrlHash().getBytes();
+ row[4] = wordEntry.toEncodedForm().getBytes();
+ dumpArray.set((int) urlcount++, row);
+ }
+ }
+ wordcount++;
+ i.remove(); // free some mem
+
+ }
+ }
+
+ // write wCache
+ synchronized (wCache) {
+ Iterator i = wCache.entrySet().iterator();
while (i.hasNext()) {
// get entries
entry = (Map.Entry) i.next();
@@ -145,7 +176,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (System.currentTimeMillis() > messageTime) {
// System.gc(); // for better statistic
wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime);
- log.logInfo("dumping status: " + wordcount + " words done, " + (cache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
+ log.logInfo("dumping status: " + wordcount + " words done, " + (wCache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
messageTime = System.currentTimeMillis() + 5000;
}
}
@@ -164,7 +195,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
long messageTime = System.currentTimeMillis() + 5000;
long urlCount = 0, urlsPerSecond = 0;
try {
- synchronized (cache) {
+ synchronized (wCache) {
int i = dumpArray.size();
String wordHash;
//long creationTime;
@@ -179,7 +210,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
//creationTime = kelondroRecords.bytes2long(row[2]);
wordEntry = new plasmaWordIndexEntry(new String(row[3], "UTF-8"), new String(row[4], "UTF-8"));
// store to cache
- addEntry(wordHash, wordEntry, startTime);
+ addEntry(wordHash, wordEntry, startTime, false);
urlCount++;
// protect against memory shortage
//while (rt.freeMemory() < 1000000) {flushFromMem(); java.lang.System.gc();}
@@ -194,7 +225,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
dumpArray.close();
- log.logConfig("restored " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
+ log.logConfig("restored " + wCache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
} catch (kelondroException e) {
// restore failed
log.logSevere("restore of indexCache array dump failed: " + e.getMessage(), e);
@@ -206,72 +237,94 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// cache settings
- public int maxURLinWordCache() {
+ public int maxURLinWCache() {
if (hashScore.size() == 0) return 0;
return hashScore.getMaxScore();
}
- public long minAgeOfWordCache() {
+ public long minAgeOfWCache() {
if (hashDate.size() == 0) return 0;
return System.currentTimeMillis() - longEmit(hashDate.getMaxScore());
}
- public long maxAgeOfWordCache() {
+ public long maxAgeOfWCache() {
if (hashDate.size() == 0) return 0;
return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
}
- public int wordCacheRAMSize() {
- return cache.size();
+ public long minAgeOfKCache() {
+ if (kCache.size() == 0) return 0;
+ return System.currentTimeMillis() - ((Long) kCache.lastKey()).longValue();
}
- public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
- this.maxWordsLow = maxWordsLow;
- this.maxWordsHigh = maxWordsHigh;
- }
-
- public int getMaxWordsLow() {
- return this.maxWordsLow;
+ public long maxAgeOfKCache() {
+ if (kCache.size() == 0) return 0;
+ return System.currentTimeMillis() - ((Long) kCache.firstKey()).longValue();
}
- public int getMaxWordsHigh() {
- return this.maxWordsHigh;
+ public void setMaxWordCount(int maxWords) {
+ this.wCacheMaxCount = maxWords;
+ }
+
+ public int getMaxWordCount() {
+ return this.wCacheMaxCount;
}
- public int size() {
- return cache.size();
+ public int wSize() {
+ return wCache.size();
+ }
+
+ public int kSize() {
+ return kCache.size();
}
public int indexSize(String wordHash) {
int size = 0;
- plasmaWordIndexEntryContainer cacheIndex = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+ plasmaWordIndexEntryContainer cacheIndex = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (cacheIndex != null) size += cacheIndex.size();
return size;
}
public Iterator wordHashes(String startWordHash, boolean rot) {
if (rot) throw new UnsupportedOperationException("plasmaWordIndexCache cannot rotate");
- return cache.tailMap(startWordHash).keySet().iterator();
+ return wCache.tailMap(startWordHash).keySet().iterator();
}
+ public void shiftK2W() {
+ // find entries in kCache that are too old for that place and shift them to the wCache
+ long time;
+ Long l;
+ plasmaWordIndexEntryContainer container;
+ synchronized (kCache) {
+ while (kCache.size() > 0) {
+ l = (Long) kCache.firstKey();
+ time = l.longValue();
+ if (System.currentTimeMillis() - time < kCacheMaxAge) return;
+ container = (plasmaWordIndexEntryContainer) kCache.remove(l);
+ addEntries(container, container.updated(), false);
+ }
+ }
+ }
+
public String bestFlushWordHash() {
// select appropriate hash
// we have 2 different methods to find a good hash:
// - the oldest entry in the cache
// - the entry with maximum count
- if (cache.size() == 0) return null;
+ shiftK2W();
+ if (wCache.size() == 0) return null;
try {
- synchronized (cache) {
+ synchronized (wCache) {
String hash = null;
int count = hashScore.getMaxScore();
- if ((count > ramCacheReferenceLimit) &&
+ if ((count > wCacheReferenceLimit) &&
((hash = (String) hashScore.getMaxObject()) != null) &&
- (System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) > ramCacheMinAge)) {
+ (System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) > wCacheMinAge)) {
// flush high-score entries, but not if they are too 'young'
return hash;
}
long oldestTime = longEmit(hashDate.getMinScore());
- if (((System.currentTimeMillis() - oldestTime) > ramCacheMaxAge) &&
+ if (((System.currentTimeMillis() - oldestTime) > wCacheMaxAge) &&
((hash = (String) hashDate.getMinObject()) != null)) {
// flush out-dated entries
return hash;
@@ -280,7 +333,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (Runtime.getRuntime().freeMemory() < 10000000) {
// low-memory case
hash = (String) hashScore.getMaxObject(); // flush high-score entries (saves RAM)
- if (System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) < ramCacheMinAge) {
+ if (System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) < wCacheMinAge) {
// to young, take it from the oldest entries
hash = (String) hashDate.getMinObject();
}
@@ -297,25 +350,19 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
private int intTime(long longTime) {
- return (int) ((longTime - startTime) / 1000);
+ return (int) Math.max(0, ((longTime - startTime) / 1000));
}
private long longEmit(int intTime) {
return (((long) intTime) * (long) 1000) + startTime;
}
- /*
- private long longTime(int intTime) {
- return ((long) intTime) * ((long) 1000) + startTime;
- }
- */
-
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty) {
- return (plasmaWordIndexEntryContainer) cache.get(wordHash);
+ return (plasmaWordIndexEntryContainer) wCache.get(wordHash);
}
public long getUpdateTime(String wordHash) {
- plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+ plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (entries == null) return 0;
return entries.updated();
/*
@@ -327,8 +374,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
// returns the index that had been deleted
- synchronized (cache) {
- plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.remove(wordHash);
+ synchronized (wCache) {
+ plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) wCache.remove(wordHash);
hashScore.deleteScore(wordHash);
hashDate.deleteScore(wordHash);
return container;
@@ -338,7 +385,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
if (urlHashes.length == 0) return 0;
int count = 0;
- synchronized (cache) {
+ synchronized (wCache) {
plasmaWordIndexEntryContainer c = (plasmaWordIndexEntryContainer) deleteContainer(wordHash);
if (c != null) {
count = c.removeEntries(wordHash, urlHashes, deleteComplete);
@@ -348,12 +395,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return count;
}
+ /*
public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// Such entries must be searched in the latest entries
int delCount = 0;
- synchronized (cache) {
+ synchronized (wCache) {
Iterator i = hashDate.scores(false);
String wordHash;
long t;
@@ -362,11 +410,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
wordHash = (String) i.next();
// check time
t = longEmit(hashDate.getScore(wordHash));
- if (System.currentTimeMillis() - t > ramCacheMinAge) return delCount;
+ if (System.currentTimeMillis() - t > wCacheMinAge) return delCount;
// get container
- c = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+ c = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (c.remove(urlHash) != null) {
- cache.put(wordHash, c);
+ wCache.put(wordHash, c);
hashScore.decScore(wordHash);
delCount++;
}
@@ -374,50 +422,87 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
return delCount;
}
+ */
- public int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean highPriority) {
+ public int tryRemoveURLs(String urlHash) {
+ // this tries to delete an index from the cache that has this
+ // urlHash assigned. This can only work if the entry is really fresh
+ // Such entries must be searched in the latest entries
+ int delCount = 0;
+ synchronized (kCache) {
+ Iterator i = kCache.entrySet().iterator();
+ Map.Entry entry;
+ Long l;
+ plasmaWordIndexEntryContainer c;
+ while (i.hasNext()) {
+ entry = (Map.Entry) i.next();
+ l = (Long) entry.getKey();
+
+ // get container
+ c = (plasmaWordIndexEntryContainer) entry.getValue();
+ if (c.remove(urlHash) != null) {
+ if (c.size() == 0) {
+ i.remove();
+ } else {
+ kCache.put(l, c); // superfluous?
+ }
+ delCount++;
+ }
+ }
+ }
+ return delCount;
+ }
+
+ public int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean dhtCase) {
// this puts the entries into the cache, not into the assortment directly
-
int added = 0;
- // check cache space
-
- //serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
// put new words into cache
- String wordHash = container.wordHash();
- plasmaWordIndexEntryContainer entries = null;
- synchronized (cache) {
- // put container into cache
- entries = (plasmaWordIndexEntryContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
+ if (dhtCase) synchronized (kCache) {
+ // put container into kCache
+ kCache.put(new Long(updateTime + kCacheInc), container);
+ kCacheInc++;
+ if (kCacheInc > 10000) kCacheInc = 0;
+ added = container.size();
+ } else synchronized (wCache) {
+ // put container into wCache
+ String wordHash = container.wordHash();
+ plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) wCache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new plasmaWordIndexEntryContainer(wordHash);
added = entries.add(container);
if (added > 0) {
- cache.put(wordHash, entries);
+ wCache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.setScore(wordHash, intTime(updateTime));
}
+ entries = null;
}
- entries = null;
return added;
}
- public boolean addEntry(String wordHash, plasmaWordIndexEntry newEntry, long updateTime) {
- plasmaWordIndexEntryContainer container = null;
- plasmaWordIndexEntry[] entries = null;
- synchronized (cache) {
- container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+ public boolean addEntry(String wordHash, plasmaWordIndexEntry newEntry, long updateTime, boolean dhtCase) {
+ if (dhtCase) synchronized (kCache) {
+ // put container into kCache
+ plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);
+ container.add(newEntry);
+ kCache.put(new Long(updateTime + kCacheInc), container);
+ kCacheInc++;
+ if (kCacheInc > 10000) kCacheInc = 0;
+ return true;
+ } else synchronized (wCache) {
+ plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (container == null) container = new plasmaWordIndexEntryContainer(wordHash);
- entries = new plasmaWordIndexEntry[] { newEntry };
+ plasmaWordIndexEntry[] entries = new plasmaWordIndexEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
- cache.put(wordHash, container);
+ wCache.put(wordHash, container);
hashScore.incScore(wordHash);
hashDate.setScore(wordHash, intTime(updateTime));
return true;
}
+ container = null;
+ entries = null;
+ return false;
}
- container = null;
- entries = null;
- return false;
}
public void close(int waitingSeconds) {
diff --git a/source/de/anomic/plasma/plasmaWordIndexInterface.java b/source/de/anomic/plasma/plasmaWordIndexInterface.java
index d0c5878b8..dc47838a3 100644
--- a/source/de/anomic/plasma/plasmaWordIndexInterface.java
+++ b/source/de/anomic/plasma/plasmaWordIndexInterface.java
@@ -55,7 +55,8 @@ public interface plasmaWordIndexInterface {
public plasmaWordIndexEntryContainer deleteContainer(String wordHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
- public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean highPriority);
+ public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long updateTime, boolean dhtCase);
+ public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean dhtCase);
public void close(int waitingSeconds);
diff --git a/yacy.init b/yacy.init
index 9d7f71ecd..096d96500 100644
--- a/yacy.init
+++ b/yacy.init
@@ -310,18 +310,6 @@ yacyDebugMode=false
#staticIP if you have a static IP, you can use this setting
staticIP=
-# if the process is running behind a NAT or ROUTER, we cannot easily identify
-# the public IP of the process. We can ask a public IP responder, but cannot
-# rely on it. Therefore, AnomicHTTPProxy includes it's own responder.
-# But for the first running peer this is not an option.
-# The author uses a DI-604 router, which can be
-# asked for the public IP. If you own a DI-604 as well, please set the
-# DI604use to true and put in your router password, it will not be used for any
-# other purpose of asking for the IP
-#DI604use=true
-DI604use=false
-DI604pw=
-
# each time the proxy starts up, it can trigger the local browser to show the
# status page. This is active by default, to make it easier for first-time
# users to understand what this application does. You can disable browser
@@ -513,7 +501,6 @@ javastart_Xmx=Xmx64m
# -Xms set initial Java heap size
javastart_Xms=Xms10m
-
# performance properties for the word index cache
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
# ram cache during indexing. When YaCy is shut down, this cache must be
@@ -522,9 +509,7 @@ javastart_Xms=Xms10m
# remote index transmissions and search requests
# maxWaitingWordFlush gives the number of seconds that the shutdown
# may last for the word flush
-wordCacheMaxLow = 12000
-wordCacheMaxHigh = 16000
-maxWaitingWordFlush = 180
+wordCacheMaxCount = 12000
# Specifies if yacy can be used as transparent http proxy.
#