introduced a second RAM cache for DHT transfer

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1880 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 360a460da8
commit bcd99fe83e

@ -69,53 +69,59 @@ Changes take effect immediately</td>
<div class=small><b>Indexing Cache Settings:</b></div>
<form action="PerformanceQueues_p.html" method="post" enctype="multipart/form-data">
<table border="0" cellpadding="5" cellspacing="1" width="100%">
<tr valign="top" class="TableHeader">
<td class=small>Cache Type</td>
<td class=small>Indexing</td>
<td class=small>DHT</td>
<td class=small>Description</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Words in RAM Cache:</td>
<td class=small>#[wordCacheRAMSize]#</td>
<td class=small>Words in RAM cache:</td>
<td class=small>#[wordCacheWSize]#</td>
<td class=small>#[wordCacheKSize]#</td>
<td class=small>
This is the current size of the word cache.
The smaller this number, the faster the shut-down procedure will be.
The maximum of this cache can be set below.
This is the current size of the word caches.
The indexing cache speeds up the indexing process, the DHT cache holds indexes temporary for approval.
The maximum of this caches can be set below.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum URLs currently assigned<br>to one cached word:</td>
<td class=small>#[maxURLinWordCache]#</td>
<td class=small>#[maxURLinWCache]#</td>
<td class=small>not controlled<br>for DHT cache</td>
<td class=small>
This is the maximum size of URLs assigned to a single word cache entry.
If this is a big number, it shows that the caching works efficiently.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum Age of Word in cache:</td>
<td class=small>#[maxAgeOfWordCache]#</td>
<td class=small>Maximum age of a word:</td>
<td class=small>#[maxAgeOfWCache]#</td>
<td class=small>#[maxAgeOfKCache]#</td>
<td class=small>
This is the maximum age of a word index that is in the RAM cache in minutes.
This is the maximum age of a word in an index in minutes.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Minimum Age of Word in cache:</td>
<td class=small>#[minAgeOfWordCache]#</td>
<td class=small>Minimum age of a word:</td>
<td class=small>#[minAgeOfWCache]#</td>
<td class=small>#[minAgeOfKCache]#</td>
<td class=small>
This is the minimum age of a word index that is in the RAM cache in minutes.
This is the minimum age of a word in an index in minutes.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches, low limit:</td>
<td class=small><input name="wordCacheMaxLow" type="text" size="20" maxlength="100" value="#[wordCacheMaxLow]#"></td>
<td class=small>Maximum number of words in cache:</td>
<td class=small><input name="wordCacheMaxCount" type="text" size="20" maxlength="100" value="#[wordCacheMaxCount]#"></td>
<td class=small>cannot be set for DHT</td>
<td class=small rowspan="2">
This is is the number of word indexes that shall be held in the
ram cache during indexing. When YaCy is shut down, this cache must be
flushed to disc; this may last some minutes. The low limit is valid for crawling tasks, the high limit is valid
for search and DHT transmission tasks.
flushed to disc; this may last some minutes.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches, high limit:</td>
<td class=small><input name="wordCacheMaxHigh" type="text" size="20" maxlength="100" value="#[wordCacheMaxHigh]#"></td>
</tr>
<tr valign="top" class="TableCellLight">
<td class=small colspan="3">
<td class=small colspan="4">
<input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size">
Changes take effect immediately</td>
</tr>

@ -142,15 +142,14 @@ public class PerformanceQueues_p {
idlesleep = Long.parseLong(d((String) defaultSettings.get(threadName + "_idlesleep"), "1000"));
busysleep = Long.parseLong(d((String) defaultSettings.get(threadName + "_busysleep"), "100"));
memprereq = Long.parseLong(d((String) defaultSettings.get(threadName + "_memprereq"), "0"));
// check values to prevent short-cut loops
if (idlesleep < 1000) idlesleep = 1000;
if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep = 0; memprereq = 0; }
if ((threadName.equals("50_localcrawl")) && (busysleep < 100)) busysleep = 100;
if ((threadName.equals("61_globalcrawltrigger")) && (busysleep < 100)) busysleep = 100;
if ((threadName.equals("62_remotetriggeredcrawl")) && (busysleep < 100)) busysleep = 100;
// on-the-fly re-configuration
switchboard.setThreadPerformance(threadName, idlesleep, busysleep, memprereq);
switchboard.setConfig(threadName + "_idlesleep", idlesleep);
@ -171,12 +170,9 @@ public class PerformanceQueues_p {
prop.put("table", c);
if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
int wordCacheMaxLow = post.getInt("wordCacheMaxLow", 8000);
int wordCacheMaxHigh = post.getInt("wordCacheMaxHigh", 10000);
if (wordCacheMaxLow > wordCacheMaxHigh) wordCacheMaxLow = wordCacheMaxHigh;
switchboard.setConfig("wordCacheMaxLow", Integer.toString(wordCacheMaxLow));
switchboard.setConfig("wordCacheMaxHigh", Integer.toString(wordCacheMaxHigh));
switchboard.wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
int wordCacheMaxCount = post.getInt("wordCacheMaxCount", 10000);
switchboard.setConfig("wordCacheMaxCount", Integer.toString(wordCacheMaxCount));
switchboard.wordIndex.setMaxWordCount(wordCacheMaxCount);
int maxWaitingWordFlush = post.getInt("maxWaitingWordFlush", 180);
switchboard.setConfig("maxWaitingWordFlush", Integer.toString(maxWaitingWordFlush));
}
@ -251,13 +247,15 @@ public class PerformanceQueues_p {
}
// table cache settings
prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
prop.put("maxAgeOfWordCache", "" + (switchboard.wordIndex.maxAgeOfWordCache() / 1000 / 60)); // minutes
prop.put("minAgeOfWordCache", "" + (switchboard.wordIndex.minAgeOfWordCache() / 1000 / 60)); // minutes
prop.put("wordCacheWSize", switchboard.wordIndex.wSize());
prop.put("wordCacheKSize", switchboard.wordIndex.kSize());
prop.put("maxURLinWCache", "" + switchboard.wordIndex.maxURLinWCache());
prop.put("maxAgeOfWCache", "" + (switchboard.wordIndex.maxAgeOfWCache() / 1000 / 60)); // minutes
prop.put("minAgeOfWCache", "" + (switchboard.wordIndex.minAgeOfWCache() / 1000 / 60)); // minutes
prop.put("maxAgeOfKCache", "" + (switchboard.wordIndex.maxAgeOfKCache() / 1000 / 60)); // minutes
prop.put("minAgeOfKCache", "" + (switchboard.wordIndex.minAgeOfKCache() / 1000 / 60)); // minutes
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));
prop.put("wordCacheMaxCount", switchboard.getConfig("wordCacheMaxCount", "10000"));
prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000"));
prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess);

@ -64,7 +64,7 @@ public class status_p {
prop.put("rejected", 0);
yacyCore.peerActions.updateMySeed();
prop.put("ppm", yacyCore.seedDB.mySeed.get(yacySeed.ISPEED, "unknown"));
prop.put("wordCacheSize", switchboard.wordIndex.wordCacheRAMSize());
prop.put("wordCacheSize", switchboard.wordIndex.wSize() + switchboard.wordIndex.kSize());
prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));

@ -27,7 +27,6 @@ package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
public class kelondroCollectionIndex {

@ -73,17 +73,9 @@ public final class kelondroMScoreCluster {
} catch (ParseException e) {}
}
/*
public static int string2score(String s) {
int i = string2scoreX(s);
System.out.println("string2core(" + s + ") = " + i);
return i;
}
*/
public static int string2score(String s) {
// this can be used to calculate a score from a string
if ((s == null) || (s.length() == 0) || (s.charAt(0) == '-')) return 0;
try {
long l = 0;
if (s.length() == shortDateFormatString.length()) {
@ -97,7 +89,10 @@ public final class kelondroMScoreCluster {
}
// fix out-of-ranges
if (l > Integer.MAX_VALUE) return Integer.MAX_VALUE;
if (l < 0) return 0;
if (l < 0) {
System.out.println("string2score: negative score for input " + s);
return 0;
}
return (int) l;
} catch (Exception e) {
// try it lex
@ -110,7 +105,10 @@ public final class kelondroMScoreCluster {
}
for (int i = len; i < 5; i++) c <<= 6;
if (c > Integer.MAX_VALUE) return Integer.MAX_VALUE;
if (c < 0) return 0;
if (c < 0) {
System.out.println("string2score: negative score for input " + s);
return 0;
}
return c;
}
}
@ -411,14 +409,18 @@ public final class kelondroMScoreCluster {
public static void main(String[] args) {
if (args.length > 0) System.out.println("score of " + args[0] + ": " + string2score(args[0]));
//System.exit(0);
String t = "ZZZZZZZZZZ";
System.out.println("score of " + t + ": " + string2score(t));
if (args.length > 0) {
System.out.println("score of " + args[0] + ": " + string2score(args[0]));
System.exit(0);
}
System.out.println("Test for Score: start");
kelondroMScoreCluster s = new kelondroMScoreCluster();
long c = 0;
long c = 0;
// create cluster
// create cluster
long time = System.currentTimeMillis();
Random random = new Random(1234);
int r;

@ -304,7 +304,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
rcGlobal.setWordHash(wordHash);
wordIndex.addEntries(rcGlobal, System.currentTimeMillis(), true);
wordIndex.addEntries(rcGlobal, System.currentTimeMillis(), false);
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
}
// the rcGlobal was flushed, empty it

@ -369,9 +369,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log);
int wordCacheMaxLow = (int) getConfigLong("wordCacheMaxLow", 8000);
int wordCacheMaxHigh = (int) getConfigLong("wordCacheMaxHigh", 10000);
wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 10000);
wordIndex.setMaxWordCount(wordCacheMaxCount);
// start a cache manager
log.logConfig("Starting HT Cache Manager");

@ -90,20 +90,32 @@ public final class plasmaWordIndex {
return databaseRoot;
}
public int maxURLinWordCache() {
return ramCache.maxURLinWordCache();
public int maxURLinWCache() {
return ramCache.maxURLinWCache();
}
public long minAgeOfWordCache() {
return ramCache.minAgeOfWordCache();
public long minAgeOfWCache() {
return ramCache.minAgeOfWCache();
}
public long maxAgeOfWordCache() {
return ramCache.maxAgeOfWordCache();
public long maxAgeOfWCache() {
return ramCache.maxAgeOfWCache();
}
public int wordCacheRAMSize() {
return ramCache.wordCacheRAMSize();
public long minAgeOfKCache() {
return ramCache.minAgeOfKCache();
}
public long maxAgeOfKCache() {
return ramCache.maxAgeOfKCache();
}
public int wSize() {
return ramCache.wSize();
}
public int kSize() {
return ramCache.kSize();
}
public int[] assortmentsSizes() {
@ -118,48 +130,49 @@ public final class plasmaWordIndex {
return assortmentCluster.cacheFillStatusCml();
}
public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
ramCache.setMaxWords(maxWordsLow, maxWordsHigh);
public void setMaxWordCount(int maxWords) {
ramCache.setMaxWordCount(maxWords);
}
public void flushControl(boolean highPriority) {
public void flushControl(boolean dhtCase) {
// check for forced flush
if (highPriority) {
if (ramCache.size() > ramCache.getMaxWordsHigh()) {
while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) {
ramCache.shiftK2W();
if (dhtCase) {
if (ramCache.wSize() > ramCache.getMaxWordCount()) {
while (ramCache.wSize() + 500 > ramCache.getMaxWordCount()) {
flushCache(1);
}
}
} else {
while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheReferenceLimit) {
while (ramCache.maxURLinWCache() > plasmaWordIndexCache.wCacheReferenceLimit) {
flushCache(1);
}
if (ramCache.size() > ramCache.getMaxWordsLow()) {
while (ramCache.size() + 500 > ramCache.getMaxWordsLow()) {
if (ramCache.wSize() > ramCache.getMaxWordCount()) {
while (ramCache.wSize() + 500 > ramCache.getMaxWordCount()) {
flushCache(1);
}
}
}
}
public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long updateTime, boolean highPriority) {
if (ramCache.addEntry(wordHash, entry, updateTime)) {
flushControl(highPriority);
public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long updateTime, boolean dhtCase) {
if (ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) {
flushControl(dhtCase);
return true;
}
return false;
}
public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean highPriority) {
int added = ramCache.addEntries(entries, updateTime, highPriority);
public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean dhtCase) {
int added = ramCache.addEntries(entries, updateTime, dhtCase);
// force flush
flushControl(highPriority);
flushControl(dhtCase);
return added;
}
public synchronized void flushCacheSome() {
int flushCount = ramCache.size() / 1000;
int flushCount = ramCache.wSize() / 1000;
if (flushCount > 50) flushCount = 50;
if (flushCount < 3) flushCount = 3;
flushCache(flushCount);
@ -167,7 +180,7 @@ public final class plasmaWordIndex {
public synchronized void flushCache(int count) {
for (int i = 0; i < count; i++) {
if (ramCache.size() == 0) break;
if (ramCache.wSize() == 0) break;
flushCache(ramCache.bestFlushWordHash());
try {Thread.sleep(10);} catch (InterruptedException e) {}
}
@ -316,7 +329,7 @@ public final class plasmaWordIndex {
public int size() {
return java.lang.Math.max(assortmentCluster.sizeTotal(),
java.lang.Math.max(backend.size(), ramCache.size()));
java.lang.Math.max(backend.size(), ramCache.wSize() + ramCache.kSize()));
}
public int indexSize(String wordHash) {

@ -56,21 +56,24 @@ import de.anomic.kelondro.kelondroRecords;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public final class plasmaWordIndexCache /*implements plasmaWordIndexInterface*/ {
// environment constants
private static final String indexArrayFileName = "indexDump1.array";
public static final int ramCacheReferenceLimit = 50;
public static final long ramCacheMaxAge = 1000 * 60 * 60 * 2; // milliseconds; 2 Hours
public static final long ramCacheMinAge = 1000 * 60 * 2; // milliseconds; 2 Minutes (Karenz for DHT Receive)
public static final int wCacheReferenceLimit = 50;
public static final long wCacheMaxAge = 1000 * 60 * 60 * 2; // milliseconds; 2 hours
public static final long wCacheMinAge = 1000; // milliseconds; 1 second
public static final long kCacheMaxAge = 1000 * 60 * 2; // milliseconds; 2 minutes
// class variables
private final File databaseRoot;
private final TreeMap cache;
private final TreeMap wCache; // wordhash-container
private final TreeMap kCache; // time-container; for karenz/DHT caching (set with high priority)
private final kelondroMScoreCluster hashScore;
private final kelondroMScoreCluster hashDate;
private long kCacheInc = 0;
private long startTime;
private int maxWordsLow, maxWordsHigh; // we have 2 cache limits for different priorities
private int wCacheMaxCount;
private final serverLog log;
// calculated constants
@ -85,12 +88,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
this.databaseRoot = databaseRoot;
this.cache = new TreeMap();
this.wCache = new TreeMap();
this.kCache = new TreeMap();
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new kelondroMScoreCluster();
this.kCacheInc = 0;
this.startTime = System.currentTimeMillis();
this.maxWordsLow = 8000;
this.maxWordsHigh = 10000;
this.wCacheMaxCount = 10000;
this.log = log;
// read in dump of last session
@ -102,7 +106,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
private void dump(int waitingSeconds) throws IOException {
log.logConfig("creating dump for index cache, " + cache.size() + " words (and much more urls)");
log.logConfig("creating dump for index cache, " + wCache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexArrayFileName);
if (indexDumpFile.exists()) indexDumpFile.delete();
kelondroArray dumpArray = null;
@ -110,14 +114,41 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
long startTime = System.currentTimeMillis();
long messageTime = System.currentTimeMillis() + 5000;
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
synchronized (cache) {
Iterator i = cache.entrySet().iterator();
Map.Entry entry;
String wordHash;
plasmaWordIndexEntryContainer container;
long updateTime;
plasmaWordIndexEntry wordEntry;
byte[][] row = new byte[5][];
Map.Entry entry;
String wordHash;
plasmaWordIndexEntryContainer container;
long updateTime;
plasmaWordIndexEntry wordEntry;
byte[][] row = new byte[5][];
// write kCache, this will be melted with the wCache upon load
synchronized (kCache) {
Iterator i = kCache.values().iterator();
while (i.hasNext()) {
container = (plasmaWordIndexEntryContainer) i.next();
// put entries on stack
if (container != null) {
Iterator ci = container.entries();
while (ci.hasNext()) {
wordEntry = (plasmaWordIndexEntry) ci.next();
row[0] = container.wordHash().getBytes();
row[1] = kelondroRecords.long2bytes(container.size(), 4);
row[2] = kelondroRecords.long2bytes(container.updated(), 8);
row[3] = wordEntry.getUrlHash().getBytes();
row[4] = wordEntry.toEncodedForm().getBytes();
dumpArray.set((int) urlcount++, row);
}
}
wordcount++;
i.remove(); // free some mem
}
}
// write wCache
synchronized (wCache) {
Iterator i = wCache.entrySet().iterator();
while (i.hasNext()) {
// get entries
entry = (Map.Entry) i.next();
@ -145,7 +176,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (System.currentTimeMillis() > messageTime) {
// System.gc(); // for better statistic
wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime);
log.logInfo("dumping status: " + wordcount + " words done, " + (cache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
log.logInfo("dumping status: " + wordcount + " words done, " + (wCache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
messageTime = System.currentTimeMillis() + 5000;
}
}
@ -164,7 +195,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
long messageTime = System.currentTimeMillis() + 5000;
long urlCount = 0, urlsPerSecond = 0;
try {
synchronized (cache) {
synchronized (wCache) {
int i = dumpArray.size();
String wordHash;
//long creationTime;
@ -179,7 +210,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
//creationTime = kelondroRecords.bytes2long(row[2]);
wordEntry = new plasmaWordIndexEntry(new String(row[3], "UTF-8"), new String(row[4], "UTF-8"));
// store to cache
addEntry(wordHash, wordEntry, startTime);
addEntry(wordHash, wordEntry, startTime, false);
urlCount++;
// protect against memory shortage
//while (rt.freeMemory() < 1000000) {flushFromMem(); java.lang.System.gc();}
@ -194,7 +225,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
dumpArray.close();
log.logConfig("restored " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
log.logConfig("restored " + wCache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
} catch (kelondroException e) {
// restore failed
log.logSevere("restore of indexCache array dump failed: " + e.getMessage(), e);
@ -206,72 +237,94 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// cache settings
public int maxURLinWordCache() {
public int maxURLinWCache() {
if (hashScore.size() == 0) return 0;
return hashScore.getMaxScore();
}
public long minAgeOfWordCache() {
public long minAgeOfWCache() {
if (hashDate.size() == 0) return 0;
return System.currentTimeMillis() - longEmit(hashDate.getMaxScore());
}
public long maxAgeOfWordCache() {
public long maxAgeOfWCache() {
if (hashDate.size() == 0) return 0;
return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
}
public int wordCacheRAMSize() {
return cache.size();
public long minAgeOfKCache() {
if (kCache.size() == 0) return 0;
return System.currentTimeMillis() - ((Long) kCache.lastKey()).longValue();
}
public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
this.maxWordsLow = maxWordsLow;
this.maxWordsHigh = maxWordsHigh;
}
public int getMaxWordsLow() {
return this.maxWordsLow;
public long maxAgeOfKCache() {
if (kCache.size() == 0) return 0;
return System.currentTimeMillis() - ((Long) kCache.firstKey()).longValue();
}
public int getMaxWordsHigh() {
return this.maxWordsHigh;
public void setMaxWordCount(int maxWords) {
this.wCacheMaxCount = maxWords;
}
public int getMaxWordCount() {
return this.wCacheMaxCount;
}
public int size() {
return cache.size();
public int wSize() {
return wCache.size();
}
public int kSize() {
return kCache.size();
}
public int indexSize(String wordHash) {
int size = 0;
plasmaWordIndexEntryContainer cacheIndex = (plasmaWordIndexEntryContainer) cache.get(wordHash);
plasmaWordIndexEntryContainer cacheIndex = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (cacheIndex != null) size += cacheIndex.size();
return size;
}
public Iterator wordHashes(String startWordHash, boolean rot) {
if (rot) throw new UnsupportedOperationException("plasmaWordIndexCache cannot rotate");
return cache.tailMap(startWordHash).keySet().iterator();
return wCache.tailMap(startWordHash).keySet().iterator();
}
public void shiftK2W() {
// find entries in kCache that are too old for that place and shift them to the wCache
long time;
Long l;
plasmaWordIndexEntryContainer container;
synchronized (kCache) {
while (kCache.size() > 0) {
l = (Long) kCache.firstKey();
time = l.longValue();
if (System.currentTimeMillis() - time < kCacheMaxAge) return;
container = (plasmaWordIndexEntryContainer) kCache.remove(l);
addEntries(container, container.updated(), false);
}
}
}
public String bestFlushWordHash() {
// select appropriate hash
// we have 2 different methods to find a good hash:
// - the oldest entry in the cache
// - the entry with maximum count
if (cache.size() == 0) return null;
shiftK2W();
if (wCache.size() == 0) return null;
try {
synchronized (cache) {
synchronized (wCache) {
String hash = null;
int count = hashScore.getMaxScore();
if ((count > ramCacheReferenceLimit) &&
if ((count > wCacheReferenceLimit) &&
((hash = (String) hashScore.getMaxObject()) != null) &&
(System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) > ramCacheMinAge)) {
(System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) > wCacheMinAge)) {
// flush high-score entries, but not if they are too 'young'
return hash;
}
long oldestTime = longEmit(hashDate.getMinScore());
if (((System.currentTimeMillis() - oldestTime) > ramCacheMaxAge) &&
if (((System.currentTimeMillis() - oldestTime) > wCacheMaxAge) &&
((hash = (String) hashDate.getMinObject()) != null)) {
// flush out-dated entries
return hash;
@ -280,7 +333,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (Runtime.getRuntime().freeMemory() < 10000000) {
// low-memory case
hash = (String) hashScore.getMaxObject(); // flush high-score entries (saves RAM)
if (System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) < ramCacheMinAge) {
if (System.currentTimeMillis() - longEmit(hashDate.getScore(hash)) < wCacheMinAge) {
// to young, take it from the oldest entries
hash = (String) hashDate.getMinObject();
}
@ -297,25 +350,19 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
private int intTime(long longTime) {
return (int) ((longTime - startTime) / 1000);
return (int) Math.max(0, ((longTime - startTime) / 1000));
}
private long longEmit(int intTime) {
return (((long) intTime) * (long) 1000) + startTime;
}
/*
private long longTime(int intTime) {
return ((long) intTime) * ((long) 1000) + startTime;
}
*/
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty) {
return (plasmaWordIndexEntryContainer) cache.get(wordHash);
return (plasmaWordIndexEntryContainer) wCache.get(wordHash);
}
public long getUpdateTime(String wordHash) {
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash);
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (entries == null) return 0;
return entries.updated();
/*
@ -327,8 +374,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
// returns the index that had been deleted
synchronized (cache) {
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.remove(wordHash);
synchronized (wCache) {
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) wCache.remove(wordHash);
hashScore.deleteScore(wordHash);
hashDate.deleteScore(wordHash);
return container;
@ -338,7 +385,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
if (urlHashes.length == 0) return 0;
int count = 0;
synchronized (cache) {
synchronized (wCache) {
plasmaWordIndexEntryContainer c = (plasmaWordIndexEntryContainer) deleteContainer(wordHash);
if (c != null) {
count = c.removeEntries(wordHash, urlHashes, deleteComplete);
@ -348,12 +395,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return count;
}
/*
public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// Such entries must be searched in the latest entries
int delCount = 0;
synchronized (cache) {
synchronized (wCache) {
Iterator i = hashDate.scores(false);
String wordHash;
long t;
@ -362,11 +410,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
wordHash = (String) i.next();
// check time
t = longEmit(hashDate.getScore(wordHash));
if (System.currentTimeMillis() - t > ramCacheMinAge) return delCount;
if (System.currentTimeMillis() - t > wCacheMinAge) return delCount;
// get container
c = (plasmaWordIndexEntryContainer) cache.get(wordHash);
c = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (c.remove(urlHash) != null) {
cache.put(wordHash, c);
wCache.put(wordHash, c);
hashScore.decScore(wordHash);
delCount++;
}
@ -374,50 +422,87 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
}
return delCount;
}
*/
public int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean highPriority) {
public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// Such entries must be searched in the latest entries
int delCount = 0;
synchronized (kCache) {
Iterator i = kCache.entrySet().iterator();
Map.Entry entry;
Long l;
plasmaWordIndexEntryContainer c;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
l = (Long) entry.getKey();
// get container
c = (plasmaWordIndexEntryContainer) entry.getValue();
if (c.remove(urlHash) != null) {
if (c.size() == 0) {
i.remove();
} else {
kCache.put(l, c); // superfluous?
}
delCount++;
}
}
}
return delCount;
}
public int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean dhtCase) {
// this puts the entries into the cache, not into the assortment directly
int added = 0;
// check cache space
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
// put new words into cache
String wordHash = container.wordHash();
plasmaWordIndexEntryContainer entries = null;
synchronized (cache) {
// put container into cache
entries = (plasmaWordIndexEntryContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (dhtCase) synchronized (kCache) {
// put container into kCache
kCache.put(new Long(updateTime + kCacheInc), container);
kCacheInc++;
if (kCacheInc > 10000) kCacheInc = 0;
added = container.size();
} else synchronized (wCache) {
// put container into wCache
String wordHash = container.wordHash();
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) wCache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new plasmaWordIndexEntryContainer(wordHash);
added = entries.add(container);
if (added > 0) {
cache.put(wordHash, entries);
wCache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.setScore(wordHash, intTime(updateTime));
}
entries = null;
}
entries = null;
return added;
}
public boolean addEntry(String wordHash, plasmaWordIndexEntry newEntry, long updateTime) {
plasmaWordIndexEntryContainer container = null;
plasmaWordIndexEntry[] entries = null;
synchronized (cache) {
container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
public boolean addEntry(String wordHash, plasmaWordIndexEntry newEntry, long updateTime, boolean dhtCase) {
if (dhtCase) synchronized (kCache) {
// put container into kCache
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);
container.add(newEntry);
kCache.put(new Long(updateTime + kCacheInc), container);
kCacheInc++;
if (kCacheInc > 10000) kCacheInc = 0;
return true;
} else synchronized (wCache) {
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
if (container == null) container = new plasmaWordIndexEntryContainer(wordHash);
entries = new plasmaWordIndexEntry[] { newEntry };
plasmaWordIndexEntry[] entries = new plasmaWordIndexEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
cache.put(wordHash, container);
wCache.put(wordHash, container);
hashScore.incScore(wordHash);
hashDate.setScore(wordHash, intTime(updateTime));
return true;
}
container = null;
entries = null;
return false;
}
container = null;
entries = null;
return false;
}
public void close(int waitingSeconds) {

@ -55,7 +55,8 @@ public interface plasmaWordIndexInterface {
public plasmaWordIndexEntryContainer deleteContainer(String wordHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean highPriority);
public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long updateTime, boolean dhtCase);
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean dhtCase);
public void close(int waitingSeconds);

@ -310,18 +310,6 @@ yacyDebugMode=false
#staticIP if you have a static IP, you can use this setting
staticIP=
# if the process is running behind a NAT or ROUTER, we cannot easily identify
# the public IP of the process. We can ask a public IP responder, but cannot
# rely on it. Therefore, AnomicHTTPProxy includes it's own responder.
# But for the first running peer this is not an option.
# The author uses a DI-604 router, which can be
# asked for the public IP. If you own a DI-604 as well, please set the
# DI604use to true and put in your router password, it will not be used for any
# other purpose of asking for the IP
#DI604use=true
DI604use=false
DI604pw=
# each time the proxy starts up, it can trigger the local browser to show the
# status page. This is active by default, to make it easier for first-time
# users to understand what this application does. You can disable browser
@ -513,7 +501,6 @@ javastart_Xmx=Xmx64m
# -Xms<size> set initial Java heap size
javastart_Xms=Xms10m
# performance properties for the word index cache
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
# ram cache during indexing. When YaCy is shut down, this cache must be
@ -522,9 +509,7 @@ javastart_Xms=Xms10m
# remote index transmissions and search requests
# maxWaitingWordFlush gives the number of seconds that the shutdown
# may last for the word flush
wordCacheMaxLow = 12000
wordCacheMaxHigh = 16000
maxWaitingWordFlush = 180
wordCacheMaxCount = 12000
# Specifies if yacy can be used as transparent http proxy.
#

Loading…
Cancel
Save