removed distinction between DHT-in and DHT-out. This is necessary to make room for the new cell data structure, which cannot use this this distinction in the first place, but will enable the same meaning with different mechanisms (segments, later)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5511 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 34da04c7dd
commit dedfc7df7f

@ -68,14 +68,13 @@
<fieldset><legend>Cache Settings:</legend>
<table border="0" cellpadding="5" cellspacing="1">
<tr valign="top" class="TableHeader">
<td>Cache Type</td>
<td>DHT-Out</td>
<td>DHT-In</td>
<td></td>
<td>RAM Cache</td>
<td>Description</td>
</tr>
<tr valign="top" class="TableCellDark">
<td>URLs in RAM buffer:</td>
<td colspan="2" align="center">#[urlCacheSize]#</td>
<td align="center">#[urlCacheSize]#</td>
<td>
This is the size of the URL write buffer. Its purpose is to buffer incoming URLs
in case of search result transmission and during DHT transfer.
@ -83,8 +82,7 @@
</tr>
<tr valign="top" class="TableCellDark">
<td>Words in RAM cache:<br />(Size in KBytes)</td>
<td>#[wordCacheWSize]#<br />(#[wordCacheWSizeKBytes]# KB)</td>
<td>#[wordCacheKSize]#<br />(#[wordCacheKSizeKBytes]# KB)</td>
<td>#[wordCacheSize]#<br />(#[wordCacheSizeKBytes]# KB)</td>
<td>
This is the current size of the word caches.
The indexing cache speeds up the indexing process, the DHT cache holds indexes temporary for approval.
@ -93,8 +91,7 @@
</tr>
<tr valign="top" class="TableCellDark">
<td>Maximum URLs currently assigned<br />to one cached word:</td>
<td>#[maxURLinWCache]#</td>
<td>#[maxURLinKCache]#</td>
<td>#[maxURLinCache]#</td>
<td>
This is the maximum size of URLs assigned to a single word cache entry.
If this is a big number, it shows that the caching works efficiently.
@ -102,23 +99,21 @@
</tr>
<tr valign="top" class="TableCellDark">
<td>Maximum age of a word:</td>
<td>#[maxAgeOfWCache]#</td>
<td>#[maxAgeOfKCache]#</td>
<td>#[maxAgeOfCache]#</td>
<td>
This is the maximum age of a word in an index in minutes.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td>Minimum age of a word:</td>
<td>#[minAgeOfWCache]#</td>
<td>#[minAgeOfKCache]#</td>
<td>#[minAgeOfCache]#</td>
<td>
This is the minimum age of a word in an index in minutes.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td>Maximum number of words in cache:</td>
<td colspan="2">
<td>
<input name="wordCacheMaxCount" type="text" size="10" maxlength="100" value="#[wordCacheMaxCount]#" />
</td>
<td>
@ -129,7 +124,7 @@
</tr>
<tr valign="top" class="TableCellDark">
<td>Initial space of words in cache:</td>
<td colspan="2">
<td>
<input name="wordCacheInitCount" type="text" size="10" maxlength="100" value="#[wordCacheInitCount]#" />
</td>
<td>

@ -286,16 +286,11 @@ public class PerformanceQueues_p {
// table cache settings
prop.putNum("urlCacheSize", switchboard.webIndex.getURLwriteCacheSize());
prop.putNum("wordCacheWSize", switchboard.webIndex.dhtOutCacheSize());
prop.putNum("wordCacheKSize", switchboard.webIndex.dhtInCacheSize());
prop.putNum("wordCacheWSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes(false)/1024);
prop.putNum("wordCacheKSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes(true)/1024);
prop.putNum("maxURLinWCache", switchboard.webIndex.maxURLinDHTOutCache());
prop.putNum("maxURLinKCache", switchboard.webIndex.maxURLinDHTInCache());
prop.putNum("maxAgeOfWCache", switchboard.webIndex.maxAgeOfDHTOutCache() / 1000 / 60); // minutes
prop.putNum("maxAgeOfKCache", switchboard.webIndex.maxAgeOfDHTInCache() / 1000 / 60); // minutes
prop.putNum("minAgeOfWCache", switchboard.webIndex.minAgeOfDHTOutCache() / 1000 / 60); // minutes
prop.putNum("minAgeOfKCache", switchboard.webIndex.minAgeOfDHTInCache() / 1000 / 60); // minutes
prop.putNum("wordCacheSize", switchboard.webIndex.dhtCacheSize());
prop.putNum("wordCacheSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes()/1024);
prop.putNum("maxURLinCache", switchboard.webIndex.maxURLinDHTCache());
prop.putNum("maxAgeOfCache", switchboard.webIndex.maxAgeOfDHTCache() / 1000 / 60); // minutes
prop.putNum("minAgeOfCache", switchboard.webIndex.minAgeOfDHTCache() / 1000 / 60); // minutes
prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180));
prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000));
prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000));

@ -24,26 +24,14 @@
</Task>
#{/table}#</Tasks>
<Cache>
<dhtOut>
<urlCacheSize>#[urlCacheSize]#</urlCacheSize>
<wordCacheSize>#[wordCacheWSize]#</wordCacheSize>
<maxURLinCache>#[maxURLinWCache]#</maxURLinCache>
<maxAgeOfCache>#[maxAgeOfWCache]#</maxAgeOfCache>
<minAgeOfCache>#[minAgeOfWCache]#</minAgeOfCache>
<wordCacheMaxCount>#[wordOutCacheMaxCount]#</wordCacheMaxCount>
<wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount>
<wordFlushSize>#[wordFlushSize]#</wordFlushSize>
</dhtOut>
<dhtIn>
<urlCacheSize>#[urlCacheSize]#</urlCacheSize>
<wordCacheSize>#[wordCacheKSize]#</wordCacheSize>
<maxURLinCache>#[maxURLinKCache]#</maxURLinCache>
<maxAgeOfCache>#[maxAgeOfKCache]#</maxAgeOfCache>
<minAgeOfCache>#[minAgeOfKCache]#</minAgeOfCache>
<wordCacheMaxCount>#[wordInCacheMaxCount]#</wordCacheMaxCount>
<wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount>
<wordFlushSize>#[wordFlushSize]#</wordFlushSize>
</dhtIn>
<urlCacheSize>#[urlCacheSize]#</urlCacheSize>
<wordCacheSize>#[wordCacheSize]#</wordCacheSize>
<maxURLinCache>#[maxURLinCache]#</maxURLinCache>
<maxAgeOfCache>#[maxAgeOfCache]#</maxAgeOfCache>
<minAgeOfCache>#[minAgeOfCache]#</minAgeOfCache>
<wordCacheMaxCount>#[wordOutCacheMaxCount]#</wordCacheMaxCount>
<wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount>
<wordFlushSize>#[wordFlushSize]#</wordFlushSize>
</Cache>
<ThreadPools>
#{pool}#<Pool>

@ -36,7 +36,6 @@ import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import api.queues_p;
import de.anomic.crawler.CrawlEntry;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.SitemapImporter;
@ -68,7 +67,23 @@ public class WatchCrawler_p {
// return variable that accumulates replacements
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
// inital values for AJAX Elements (without JavaScript)
final serverObjects prop = queues_p.respond(header, post, env);
final serverObjects prop = new serverObjects();
prop.put("rejected", 0);
prop.put("indexingSize", 0);
prop.put("indexingMax", 0);
prop.put("urlpublictextSize", 0);
prop.put("rwipublictextSize", 0);
prop.put("list", "0");
prop.put("loaderSize", 0);
prop.put("loaderMax", 0);
prop.put("list-loader", 0);
prop.put("localCrawlSize", 0);
prop.put("localCrawlState", "");
prop.put("limitCrawlSize", 0);
prop.put("limitCrawlState", "");
prop.put("remoteCrawlSize", 0);
prop.put("remoteCrawlState", "");
prop.put("list-remote", 0);
prop.put("forwardToCrawlStart", "0");
prop.put("info", "0");
@ -429,7 +444,7 @@ public class WatchCrawler_p {
// return rewrite properties
return prop;
}
private static long recrawlIfOlderC(final boolean recrawlIfOlderCheck, final int recrawlIfOlderNumber, final String crawlingIfOlderUnit) {
if (!recrawlIfOlderCheck) return 0L;
if (crawlingIfOlderUnit.equals("year")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L * 24L * 365L;

@ -1,6 +1,3 @@
package api;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;

@ -21,15 +21,14 @@ public class status_p {
prop.setLocalized(false);
prop.put("rejected", "0");
sb.updateMySeed();
final int cacheOutSize = sb.webIndex.dhtOutCacheSize();
final int cacheSize = sb.webIndex.dhtCacheSize();
final long cacheMaxSize = sb.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 10000);
prop.putNum("ppm", sb.currentPPM());
prop.putNum("qpm", sb.webIndex.seedDB.mySeed().getQPM());
prop.putNum("wordCacheSize", sb.webIndex.dhtOutCacheSize() + sb.webIndex.dhtInCacheSize());
prop.putNum("wordCacheWSize", cacheOutSize);
prop.putNum("wordCacheKSize", sb.webIndex.dhtInCacheSize());
prop.putNum("wordCacheSize", sb.webIndex.dhtCacheSize());
prop.putNum("wordCacheSize", cacheSize);
prop.putNum("wordCacheMaxSize", cacheMaxSize);
prop.put("wordCacheWCount", cacheOutSize);
prop.put("wordCacheCount", cacheSize);
prop.put("wordCacheMaxCount", cacheMaxSize);
//

@ -2,10 +2,8 @@
<status>
<ppm>#[ppm]#</ppm>
<wordCacheSize>#[wordCacheSize]#</wordCacheSize>
<wordCacheWSize>#[wordCacheWSize]#</wordCacheWSize>
<wordCacheWCount>#[wordCacheWCount]#</wordCacheWCount>
<wordCacheKSize>#[wordCacheKSize]#</wordCacheKSize>
<wordCacheMaxSize>#[wordCacheMaxSize]#</wordCacheMaxSize>
<wordCacheSize>#[wordCacheSize]#</wordCacheSize>
<wordCacheCount>#[wordCacheCount]#</wordCacheCount>
<wordCacheMaxCount>#[wordCacheMaxCount]#</wordCacheMaxCount>
<memory>

@ -91,8 +91,8 @@ function handleStatus(){
removeAllChildren(trafCrawlerSpan);
trafCrawlerSpan.appendChild(document.createTextNode(Math.round((trafficCrawler) / 1024 / 10.24) / 100));
var wordCache=getValue(getFirstChild(statusTag, "wordCacheWCount"));
var wordCacheSize=getValue(getFirstChild(statusTag, "wordCacheWSize"));
var wordCache=getValue(getFirstChild(statusTag, "wordCacheCount"));
var wordCacheSize=getValue(getFirstChild(statusTag, "wordCacheSize"));
var wordCacheMax=getValue(getFirstChild(statusTag, "wordCacheMaxCount"));
var wordCacheMaxSize=getValue(getFirstChild(statusTag, "wordCacheMaxSize"));

@ -100,9 +100,9 @@ public final class transferRWI {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted.");
result = "not_granted";
pause = 0;
} else if (checkLimit && sb.webIndex.dhtInCacheSize() > cachelimit) {
} else if (checkLimit && sb.webIndex.dhtCacheSize() > cachelimit) {
// we are too busy to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.dhtInCacheSize() + ").");
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.dhtCacheSize() + ").");
granted = false; // don't accept more words if there are too many words to flush
result = "busy";
pause = 60000;
@ -163,7 +163,7 @@ public final class transferRWI {
}
// learn entry
sb.webIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true);
sb.webIndex.addEntry(wordHash, iEntry, System.currentTimeMillis());
serverCore.checkInterruption();
// check if we need to ask for the corresponding URL
@ -200,7 +200,7 @@ public final class transferRWI {
result = "ok";
if (checkLimit) {
pause = (sb.webIndex.dhtInCacheSize() < 500) ? 0 : sb.webIndex.dhtInCacheSize(); // estimation of necessary pause time
pause = (sb.webIndex.dhtCacheSize() < 500) ? 0 : sb.webIndex.dhtCacheSize(); // estimation of necessary pause time
}
}

@ -48,7 +48,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
public int cacheReferenceCountLimit; // the maximum number of references to a single RWI entity
public long cacheReferenceAgeLimit; // the maximum age (= time not changed) of a RWI entity
private final serverLog log;
private final File oldDumpFile, newDumpFile;
private final File dumpFile;
private indexContainerRAMHeap heap;
@SuppressWarnings("unchecked")
@ -58,7 +58,6 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
final int entityCacheMaxSize,
final int wCacheReferenceCountLimitInit,
final long wCacheReferenceAgeLimitInit,
final String oldHeapName,
final String newHeapName,
final serverLog log) {
@ -71,24 +70,13 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit;
this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit;
this.log = log;
this.oldDumpFile = new File(databaseRoot, oldHeapName);
this.newDumpFile = new File(databaseRoot, newHeapName);
this.dumpFile = new File(databaseRoot, newHeapName);
this.heap = new indexContainerRAMHeap(payloadrow);
// read in dump of last session
boolean initFailed = false;
if (newDumpFile.exists() && oldDumpFile.exists()) {
// we need only one, delete the old
oldDumpFile.delete();
}
if (oldDumpFile.exists()) try {
heap.initWriteModeFromHeap(oldDumpFile);
} catch (IOException e) {
initFailed = true;
e.printStackTrace();
}
if (newDumpFile.exists()) try {
heap.initWriteModeFromBLOB(newDumpFile);
if (dumpFile.exists()) try {
heap.initWriteModeFromBLOB(dumpFile);
} catch (IOException e) {
initFailed = true;
e.printStackTrace();
@ -97,7 +85,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
log.logSevere("unable to restore cache dump");
// get empty dump
heap.initWriteMode();
} else if (oldDumpFile.exists() || newDumpFile.exists()) {
} else if (dumpFile.exists()) {
// initialize scores for cache organization
for (final indexContainer ic : (Iterable<indexContainer>) heap.wordContainers(null, false)) {
this.hashDate.setScore(ic.getWordHash(), intTime(ic.lastWrote()));
@ -327,7 +315,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
// dump cache
try {
//heap.dumpold(this.oldDumpFile);
heap.dump(this.newDumpFile);
heap.dump(this.dumpFile);
} catch (final IOException e){
log.logSevere("unable to dump cache: " + e.getMessage(), e);
}
@ -336,8 +324,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
hashDate.clear();
}
public Iterator iterator() {
// TODO Auto-generated method stub
return null;
public Iterator<indexContainer> iterator() {
return wordContainers(null, false);
}
}

@ -60,7 +60,7 @@ public class kelondroBufferedEcoFS {
}
public synchronized long size() throws IOException {
return efs.size();
return efs == null ? 0 : efs.size();
}
public File filename() {

@ -69,7 +69,6 @@ import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyPeerActions;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.yacyURL;
@ -99,7 +98,7 @@ public final class plasmaWordIndex implements indexRI {
private final kelondroByteOrder indexOrder = kelondroBase64Order.enhancedCoder;
private final indexRAMRI dhtOutCache, dhtInCache;
private final indexRAMRI dhtCache;
private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster
private final serverLog log;
private indexRepositoryReference referenceURL;
@ -141,23 +140,21 @@ public final class plasmaWordIndex implements indexRI {
}
}
}
/*
*
final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE");
if (!(textindexcache.exists())) textindexcache.mkdirs();
if (new File(textindexcache, "index.dhtout.blob").exists()) {
this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.heap", "index.dhtout.blob", log);
indexRAMRI dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.heap", "index.dhtin.blob", log);
indexContainer c1;
if (new File(textindexcache, "index.dhtin.blob").exists()) {
// migration of the both caches into one
this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
indexRAMRI dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log);
for (indexContainer c: dhtInCache) {
this.dhtCache.addEntries(c);
this.dhtCache.addEntries(c);
}
new File(textindexcache, "index.dhtin.blob").delete();
} else {
// read in new BLOB
this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
}
*/
final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE");
if (!(textindexcache.exists())) textindexcache.mkdirs();
this.dhtOutCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.heap", "index.dhtout.blob", log);
this.dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.heap", "index.dhtin.blob", log);
// create collections storage path
final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION");
@ -247,8 +244,7 @@ public final class plasmaWordIndex implements indexRI {
}
public void clear() {
dhtInCache.clear();
dhtOutCache.clear();
dhtCache.clear();
collections.clear();
try {
referenceURL.clear();
@ -421,46 +417,30 @@ public final class plasmaWordIndex implements indexRI {
}
public int minMem() {
return 1024*1024 /* indexing overhead */ + dhtOutCache.minMem() + dhtInCache.minMem() + collections.minMem();
return 1024*1024 /* indexing overhead */ + dhtCache.minMem() + collections.minMem();
}
public int maxURLinDHTOutCache() {
return dhtOutCache.maxURLinCache();
public int maxURLinDHTCache() {
return dhtCache.maxURLinCache();
}
public long minAgeOfDHTOutCache() {
return dhtOutCache.minAgeOfCache();
public long minAgeOfDHTCache() {
return dhtCache.minAgeOfCache();
}
public long maxAgeOfDHTOutCache() {
return dhtOutCache.maxAgeOfCache();
public long maxAgeOfDHTCache() {
return dhtCache.maxAgeOfCache();
}
public int maxURLinDHTInCache() {
return dhtInCache.maxURLinCache();
}
public long minAgeOfDHTInCache() {
return dhtInCache.minAgeOfCache();
}
public long maxAgeOfDHTInCache() {
return dhtInCache.maxAgeOfCache();
}
public int dhtOutCacheSize() {
return dhtOutCache.size();
}
public int dhtInCacheSize() {
return dhtInCache.size();
public int dhtCacheSize() {
return dhtCache.size();
}
public long dhtCacheSizeBytes(final boolean in) {
public long dhtCacheSizeBytes() {
// calculate the real size in bytes of DHT-In/Out-Cache
long cacheBytes = 0;
final long entryBytes = indexRWIRowEntry.urlEntryRow.objectsize;
final indexRAMRI cache = (in ? dhtInCache : dhtOutCache);
final indexRAMRI cache = (dhtCache);
synchronized (cache) {
final Iterator<indexContainer> it = cache.wordContainers(null, false);
while (it.hasNext()) cacheBytes += it.next().size() * entryBytes;
@ -469,8 +449,7 @@ public final class plasmaWordIndex implements indexRI {
}
public void setMaxWordCount(final int maxWords) {
dhtOutCache.setMaxWordCount(maxWords);
dhtInCache.setMaxWordCount(maxWords);
dhtCache.setMaxWordCount(maxWords);
}
public void dhtFlushControl(final indexRAMRI theCache) {
@ -500,38 +479,18 @@ public final class plasmaWordIndex implements indexRI {
return new indexContainer(wordHash, indexRWIRowEntry.urlEntryRow, elementCount);
}
public void addEntry(final String wordHash, final indexRWIRowEntry entry, final long updateTime, boolean dhtInCase) {
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacySeed.shallBeOwnWord(seedDB, wordHash, this.netRedundancy))) dhtInCase = true;
public void addEntry(final String wordHash, final indexRWIRowEntry entry, final long updateTime) {
// add the entry
if (dhtInCase) {
dhtInCache.addEntry(wordHash, entry, updateTime, true);
dhtFlushControl(this.dhtInCache);
} else {
dhtOutCache.addEntry(wordHash, entry, updateTime, false);
dhtFlushControl(this.dhtOutCache);
}
dhtCache.addEntry(wordHash, entry, updateTime, true);
dhtFlushControl(this.dhtCache);
}
public void addEntries(final indexContainer entries) {
addEntries(entries, false);
}
public void addEntries(final indexContainer entries, boolean dhtInCase) {
assert (entries.row().objectsize == indexRWIRowEntry.urlEntryRow.objectsize);
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacySeed.shallBeOwnWord(seedDB, entries.getWordHash(), this.netRedundancy))) dhtInCase = true;
// add the entry
if (dhtInCase) {
dhtInCache.addEntries(entries);
dhtFlushControl(this.dhtInCache);
} else {
dhtOutCache.addEntries(entries);
dhtFlushControl(this.dhtOutCache);
}
dhtCache.addEntries(entries);
dhtFlushControl(this.dhtCache);
}
public void flushCacheFor(int time) {
@ -539,19 +498,9 @@ public final class plasmaWordIndex implements indexRI {
}
private synchronized void flushCacheUntil(long timeout) {
long timeout0 = System.currentTimeMillis() + (timeout - System.currentTimeMillis()) / 10 * 6;
// we give 60% for dhtIn to prefer filling of cache with dht transmission
//int cIn = 0;
while (System.currentTimeMillis() < timeout0 && dhtInCache.size() > 0) {
flushCacheOne(dhtInCache);
//cIn++;
while (System.currentTimeMillis() < timeout && dhtCache.size() > 0) {
flushCacheOne(dhtCache);
}
//int cOut = 0;
while (System.currentTimeMillis() < timeout && dhtOutCache.size() > 0) {
flushCacheOne(dhtOutCache);
//cOut++;
}
//System.out.println("*** DEBUG cache flush: cIn = " + cIn + ", cOut = " + cOut);
}
private synchronized void flushCacheOne(final indexRAMRI ram) {
@ -615,7 +564,7 @@ public final class plasmaWordIndex implements indexRI {
doctype,
outlinksSame, outlinksOther,
wprop.flags);
addEntry(indexWord.word2hash(word), ientry, System.currentTimeMillis(), false);
addEntry(indexWord.word2hash(word), ientry, System.currentTimeMillis());
wordCount++;
}
@ -623,8 +572,7 @@ public final class plasmaWordIndex implements indexRI {
}
public boolean hasContainer(final String wordHash) {
if (dhtOutCache.hasContainer(wordHash)) return true;
if (dhtInCache.hasContainer(wordHash)) return true;
if (dhtCache.hasContainer(wordHash)) return true;
if (collections.hasContainer(wordHash)) return true;
return false;
}
@ -637,12 +585,7 @@ public final class plasmaWordIndex implements indexRI {
// get from cache
indexContainer container;
container = dhtOutCache.getContainer(wordHash, urlselection);
if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection);
} else {
container.addAllUnique(dhtInCache.getContainer(wordHash, urlselection));
}
container = dhtCache.getContainer(wordHash, urlselection);
// get from collection index
if (container == null) {
@ -727,7 +670,7 @@ public final class plasmaWordIndex implements indexRI {
}
public int size() {
return java.lang.Math.max(collections.size(), java.lang.Math.max(dhtInCache.size(), dhtOutCache.size()));
return java.lang.Math.max(collections.size(), dhtCache.size());
}
public int collectionsSize() {
@ -735,12 +678,11 @@ public final class plasmaWordIndex implements indexRI {
}
public int cacheSize() {
return dhtInCache.size() + dhtOutCache.size();
return dhtCache.size();
}
public void close() {
dhtInCache.close();
dhtOutCache.close();
dhtCache.close();
collections.close();
referenceURL.close();
seedDB.close();
@ -754,18 +696,15 @@ public final class plasmaWordIndex implements indexRI {
final indexContainer c = new indexContainer(
wordHash,
indexRWIRowEntry.urlEntryRow,
dhtInCache.sizeContainer(wordHash) + dhtOutCache.sizeContainer(wordHash)
);
c.addAllUnique(dhtInCache.deleteContainer(wordHash));
c.addAllUnique(dhtOutCache.deleteContainer(wordHash));
dhtCache.sizeContainer(wordHash));
c.addAllUnique(dhtCache.deleteContainer(wordHash));
c.addAllUnique(collections.deleteContainer(wordHash));
return c;
}
public boolean removeEntry(final String wordHash, final String urlHash) {
boolean removed = false;
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash));
removed = removed | (dhtCache.removeEntry(wordHash, urlHash));
removed = removed | (collections.removeEntry(wordHash, urlHash));
return removed;
}
@ -783,16 +722,14 @@ public final class plasmaWordIndex implements indexRI {
public int removeEntries(final String wordHash, final Set<String> urlHashes) {
int removed = 0;
removed += dhtInCache.removeEntries(wordHash, urlHashes);
removed += dhtOutCache.removeEntries(wordHash, urlHashes);
removed += dhtCache.removeEntries(wordHash, urlHashes);
removed += collections.removeEntries(wordHash, urlHashes);
return removed;
}
public String removeEntriesExpl(final String wordHash, final Set<String> urlHashes) {
String removed = "";
removed += dhtInCache.removeEntries(wordHash, urlHashes) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes) + ", ";
removed += dhtCache.removeEntries(wordHash, urlHashes) + ", ";
removed += collections.removeEntries(wordHash, urlHashes);
return removed;
}
@ -825,7 +762,7 @@ public final class plasmaWordIndex implements indexRI {
containerOrder.rotate(emptyContainer(startHash, 0));
final TreeSet<indexContainer> containers = new TreeSet<indexContainer>(containerOrder);
final Iterator<indexContainer> i = wordContainers(startHash, ram, rot);
if (ram) count = Math.min(dhtOutCache.size(), count);
if (ram) count = Math.min(dhtCache.size(), count);
indexContainer container;
// this loop does not terminate using the i.hasNex() predicate when rot == true
// because then the underlying iterator is a rotating iterator without termination
@ -958,7 +895,7 @@ public final class plasmaWordIndex implements indexRI {
public synchronized kelondroCloneableIterator<indexContainer> wordContainers(final String startHash, final boolean ram, final boolean rot) {
final kelondroCloneableIterator<indexContainer> i = wordContainers(startHash, ram);
if (rot) {
return new kelondroRotateIterator<indexContainer>(i, new String(kelondroBase64Order.zero(startHash.length())), dhtOutCache.size() + ((ram) ? 0 : collections.size()));
return new kelondroRotateIterator<indexContainer>(i, new String(kelondroBase64Order.zero(startHash.length())), dhtCache.size() + ((ram) ? 0 : collections.size()));
}
return i;
}
@ -967,10 +904,10 @@ public final class plasmaWordIndex implements indexRI {
final kelondroOrder<indexContainer> containerOrder = new indexContainerOrder(indexOrder.clone());
containerOrder.rotate(emptyContainer(startWordHash, 0));
if (ram) {
return dhtOutCache.wordContainers(startWordHash, false);
return dhtCache.wordContainers(startWordHash, false);
}
return new kelondroMergeIterator<indexContainer>(
dhtOutCache.wordContainers(startWordHash, false),
dhtCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
containerOrder,
indexContainer.containerMergeMethod,

@ -639,7 +639,7 @@ public final class yacyClient {
// insert the containers to the index
for (int m = 0; m < words; m++) {
wordIndex.addEntries(container[m], true);
wordIndex.addEntries(container[m]);
}
// generate statistics

Loading…
Cancel
Save