removed distinction between DHT-in and DHT-out. This is necessary to make room for the new cell data structure, which cannot use this this distinction in the first place, but will enable the same meaning with different mechanisms (segments, later)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5511 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 34da04c7dd
commit dedfc7df7f

@ -68,14 +68,13 @@
<fieldset><legend>Cache Settings:</legend> <fieldset><legend>Cache Settings:</legend>
<table border="0" cellpadding="5" cellspacing="1"> <table border="0" cellpadding="5" cellspacing="1">
<tr valign="top" class="TableHeader"> <tr valign="top" class="TableHeader">
<td>Cache Type</td> <td></td>
<td>DHT-Out</td> <td>RAM Cache</td>
<td>DHT-In</td>
<td>Description</td> <td>Description</td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>URLs in RAM buffer:</td> <td>URLs in RAM buffer:</td>
<td colspan="2" align="center">#[urlCacheSize]#</td> <td align="center">#[urlCacheSize]#</td>
<td> <td>
This is the size of the URL write buffer. Its purpose is to buffer incoming URLs This is the size of the URL write buffer. Its purpose is to buffer incoming URLs
in case of search result transmission and during DHT transfer. in case of search result transmission and during DHT transfer.
@ -83,8 +82,7 @@
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Words in RAM cache:<br />(Size in KBytes)</td> <td>Words in RAM cache:<br />(Size in KBytes)</td>
<td>#[wordCacheWSize]#<br />(#[wordCacheWSizeKBytes]# KB)</td> <td>#[wordCacheSize]#<br />(#[wordCacheSizeKBytes]# KB)</td>
<td>#[wordCacheKSize]#<br />(#[wordCacheKSizeKBytes]# KB)</td>
<td> <td>
This is the current size of the word caches. This is the current size of the word caches.
The indexing cache speeds up the indexing process, the DHT cache holds indexes temporary for approval. The indexing cache speeds up the indexing process, the DHT cache holds indexes temporary for approval.
@ -93,8 +91,7 @@
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Maximum URLs currently assigned<br />to one cached word:</td> <td>Maximum URLs currently assigned<br />to one cached word:</td>
<td>#[maxURLinWCache]#</td> <td>#[maxURLinCache]#</td>
<td>#[maxURLinKCache]#</td>
<td> <td>
This is the maximum size of URLs assigned to a single word cache entry. This is the maximum size of URLs assigned to a single word cache entry.
If this is a big number, it shows that the caching works efficiently. If this is a big number, it shows that the caching works efficiently.
@ -102,23 +99,21 @@
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Maximum age of a word:</td> <td>Maximum age of a word:</td>
<td>#[maxAgeOfWCache]#</td> <td>#[maxAgeOfCache]#</td>
<td>#[maxAgeOfKCache]#</td>
<td> <td>
This is the maximum age of a word in an index in minutes. This is the maximum age of a word in an index in minutes.
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Minimum age of a word:</td> <td>Minimum age of a word:</td>
<td>#[minAgeOfWCache]#</td> <td>#[minAgeOfCache]#</td>
<td>#[minAgeOfKCache]#</td>
<td> <td>
This is the minimum age of a word in an index in minutes. This is the minimum age of a word in an index in minutes.
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Maximum number of words in cache:</td> <td>Maximum number of words in cache:</td>
<td colspan="2"> <td>
<input name="wordCacheMaxCount" type="text" size="10" maxlength="100" value="#[wordCacheMaxCount]#" /> <input name="wordCacheMaxCount" type="text" size="10" maxlength="100" value="#[wordCacheMaxCount]#" />
</td> </td>
<td> <td>
@ -129,7 +124,7 @@
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Initial space of words in cache:</td> <td>Initial space of words in cache:</td>
<td colspan="2"> <td>
<input name="wordCacheInitCount" type="text" size="10" maxlength="100" value="#[wordCacheInitCount]#" /> <input name="wordCacheInitCount" type="text" size="10" maxlength="100" value="#[wordCacheInitCount]#" />
</td> </td>
<td> <td>

@ -286,16 +286,11 @@ public class PerformanceQueues_p {
// table cache settings // table cache settings
prop.putNum("urlCacheSize", switchboard.webIndex.getURLwriteCacheSize()); prop.putNum("urlCacheSize", switchboard.webIndex.getURLwriteCacheSize());
prop.putNum("wordCacheWSize", switchboard.webIndex.dhtOutCacheSize()); prop.putNum("wordCacheSize", switchboard.webIndex.dhtCacheSize());
prop.putNum("wordCacheKSize", switchboard.webIndex.dhtInCacheSize()); prop.putNum("wordCacheSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes()/1024);
prop.putNum("wordCacheWSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes(false)/1024); prop.putNum("maxURLinCache", switchboard.webIndex.maxURLinDHTCache());
prop.putNum("wordCacheKSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes(true)/1024); prop.putNum("maxAgeOfCache", switchboard.webIndex.maxAgeOfDHTCache() / 1000 / 60); // minutes
prop.putNum("maxURLinWCache", switchboard.webIndex.maxURLinDHTOutCache()); prop.putNum("minAgeOfCache", switchboard.webIndex.minAgeOfDHTCache() / 1000 / 60); // minutes
prop.putNum("maxURLinKCache", switchboard.webIndex.maxURLinDHTInCache());
prop.putNum("maxAgeOfWCache", switchboard.webIndex.maxAgeOfDHTOutCache() / 1000 / 60); // minutes
prop.putNum("maxAgeOfKCache", switchboard.webIndex.maxAgeOfDHTInCache() / 1000 / 60); // minutes
prop.putNum("minAgeOfWCache", switchboard.webIndex.minAgeOfDHTOutCache() / 1000 / 60); // minutes
prop.putNum("minAgeOfKCache", switchboard.webIndex.minAgeOfDHTInCache() / 1000 / 60); // minutes
prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180)); prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180));
prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000)); prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000));
prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000)); prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000));

@ -24,26 +24,14 @@
</Task> </Task>
#{/table}#</Tasks> #{/table}#</Tasks>
<Cache> <Cache>
<dhtOut> <urlCacheSize>#[urlCacheSize]#</urlCacheSize>
<urlCacheSize>#[urlCacheSize]#</urlCacheSize> <wordCacheSize>#[wordCacheSize]#</wordCacheSize>
<wordCacheSize>#[wordCacheWSize]#</wordCacheSize> <maxURLinCache>#[maxURLinCache]#</maxURLinCache>
<maxURLinCache>#[maxURLinWCache]#</maxURLinCache> <maxAgeOfCache>#[maxAgeOfCache]#</maxAgeOfCache>
<maxAgeOfCache>#[maxAgeOfWCache]#</maxAgeOfCache> <minAgeOfCache>#[minAgeOfCache]#</minAgeOfCache>
<minAgeOfCache>#[minAgeOfWCache]#</minAgeOfCache> <wordCacheMaxCount>#[wordOutCacheMaxCount]#</wordCacheMaxCount>
<wordCacheMaxCount>#[wordOutCacheMaxCount]#</wordCacheMaxCount> <wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount>
<wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount> <wordFlushSize>#[wordFlushSize]#</wordFlushSize>
<wordFlushSize>#[wordFlushSize]#</wordFlushSize>
</dhtOut>
<dhtIn>
<urlCacheSize>#[urlCacheSize]#</urlCacheSize>
<wordCacheSize>#[wordCacheKSize]#</wordCacheSize>
<maxURLinCache>#[maxURLinKCache]#</maxURLinCache>
<maxAgeOfCache>#[maxAgeOfKCache]#</maxAgeOfCache>
<minAgeOfCache>#[minAgeOfKCache]#</minAgeOfCache>
<wordCacheMaxCount>#[wordInCacheMaxCount]#</wordCacheMaxCount>
<wordCacheInitCount>#[wordCacheInitCount]#</wordCacheInitCount>
<wordFlushSize>#[wordFlushSize]#</wordFlushSize>
</dhtIn>
</Cache> </Cache>
<ThreadPools> <ThreadPools>
#{pool}#<Pool> #{pool}#<Pool>

@ -36,7 +36,6 @@ import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
import api.queues_p;
import de.anomic.crawler.CrawlEntry; import de.anomic.crawler.CrawlEntry;
import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.SitemapImporter; import de.anomic.crawler.SitemapImporter;
@ -68,7 +67,23 @@ public class WatchCrawler_p {
// return variable that accumulates replacements // return variable that accumulates replacements
final plasmaSwitchboard sb = (plasmaSwitchboard) env; final plasmaSwitchboard sb = (plasmaSwitchboard) env;
// inital values for AJAX Elements (without JavaScript) // inital values for AJAX Elements (without JavaScript)
final serverObjects prop = queues_p.respond(header, post, env); final serverObjects prop = new serverObjects();
prop.put("rejected", 0);
prop.put("indexingSize", 0);
prop.put("indexingMax", 0);
prop.put("urlpublictextSize", 0);
prop.put("rwipublictextSize", 0);
prop.put("list", "0");
prop.put("loaderSize", 0);
prop.put("loaderMax", 0);
prop.put("list-loader", 0);
prop.put("localCrawlSize", 0);
prop.put("localCrawlState", "");
prop.put("limitCrawlSize", 0);
prop.put("limitCrawlState", "");
prop.put("remoteCrawlSize", 0);
prop.put("remoteCrawlState", "");
prop.put("list-remote", 0);
prop.put("forwardToCrawlStart", "0"); prop.put("forwardToCrawlStart", "0");
prop.put("info", "0"); prop.put("info", "0");
@ -429,7 +444,7 @@ public class WatchCrawler_p {
// return rewrite properties // return rewrite properties
return prop; return prop;
} }
private static long recrawlIfOlderC(final boolean recrawlIfOlderCheck, final int recrawlIfOlderNumber, final String crawlingIfOlderUnit) { private static long recrawlIfOlderC(final boolean recrawlIfOlderCheck, final int recrawlIfOlderNumber, final String crawlingIfOlderUnit) {
if (!recrawlIfOlderCheck) return 0L; if (!recrawlIfOlderCheck) return 0L;
if (crawlingIfOlderUnit.equals("year")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L * 24L * 365L; if (crawlingIfOlderUnit.equals("year")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L * 24L * 365L;

@ -1,6 +1,3 @@
package api;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;

@ -21,15 +21,14 @@ public class status_p {
prop.setLocalized(false); prop.setLocalized(false);
prop.put("rejected", "0"); prop.put("rejected", "0");
sb.updateMySeed(); sb.updateMySeed();
final int cacheOutSize = sb.webIndex.dhtOutCacheSize(); final int cacheSize = sb.webIndex.dhtCacheSize();
final long cacheMaxSize = sb.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 10000); final long cacheMaxSize = sb.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 10000);
prop.putNum("ppm", sb.currentPPM()); prop.putNum("ppm", sb.currentPPM());
prop.putNum("qpm", sb.webIndex.seedDB.mySeed().getQPM()); prop.putNum("qpm", sb.webIndex.seedDB.mySeed().getQPM());
prop.putNum("wordCacheSize", sb.webIndex.dhtOutCacheSize() + sb.webIndex.dhtInCacheSize()); prop.putNum("wordCacheSize", sb.webIndex.dhtCacheSize());
prop.putNum("wordCacheWSize", cacheOutSize); prop.putNum("wordCacheSize", cacheSize);
prop.putNum("wordCacheKSize", sb.webIndex.dhtInCacheSize());
prop.putNum("wordCacheMaxSize", cacheMaxSize); prop.putNum("wordCacheMaxSize", cacheMaxSize);
prop.put("wordCacheWCount", cacheOutSize); prop.put("wordCacheCount", cacheSize);
prop.put("wordCacheMaxCount", cacheMaxSize); prop.put("wordCacheMaxCount", cacheMaxSize);
// //

@ -2,10 +2,8 @@
<status> <status>
<ppm>#[ppm]#</ppm> <ppm>#[ppm]#</ppm>
<wordCacheSize>#[wordCacheSize]#</wordCacheSize> <wordCacheSize>#[wordCacheSize]#</wordCacheSize>
<wordCacheWSize>#[wordCacheWSize]#</wordCacheWSize> <wordCacheSize>#[wordCacheSize]#</wordCacheSize>
<wordCacheWCount>#[wordCacheWCount]#</wordCacheWCount> <wordCacheCount>#[wordCacheCount]#</wordCacheCount>
<wordCacheKSize>#[wordCacheKSize]#</wordCacheKSize>
<wordCacheMaxSize>#[wordCacheMaxSize]#</wordCacheMaxSize>
<wordCacheMaxCount>#[wordCacheMaxCount]#</wordCacheMaxCount> <wordCacheMaxCount>#[wordCacheMaxCount]#</wordCacheMaxCount>
<memory> <memory>

@ -91,8 +91,8 @@ function handleStatus(){
removeAllChildren(trafCrawlerSpan); removeAllChildren(trafCrawlerSpan);
trafCrawlerSpan.appendChild(document.createTextNode(Math.round((trafficCrawler) / 1024 / 10.24) / 100)); trafCrawlerSpan.appendChild(document.createTextNode(Math.round((trafficCrawler) / 1024 / 10.24) / 100));
var wordCache=getValue(getFirstChild(statusTag, "wordCacheWCount")); var wordCache=getValue(getFirstChild(statusTag, "wordCacheCount"));
var wordCacheSize=getValue(getFirstChild(statusTag, "wordCacheWSize")); var wordCacheSize=getValue(getFirstChild(statusTag, "wordCacheSize"));
var wordCacheMax=getValue(getFirstChild(statusTag, "wordCacheMaxCount")); var wordCacheMax=getValue(getFirstChild(statusTag, "wordCacheMaxCount"));
var wordCacheMaxSize=getValue(getFirstChild(statusTag, "wordCacheMaxSize")); var wordCacheMaxSize=getValue(getFirstChild(statusTag, "wordCacheMaxSize"));

@ -100,9 +100,9 @@ public final class transferRWI {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted."); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted.");
result = "not_granted"; result = "not_granted";
pause = 0; pause = 0;
} else if (checkLimit && sb.webIndex.dhtInCacheSize() > cachelimit) { } else if (checkLimit && sb.webIndex.dhtCacheSize() > cachelimit) {
// we are too busy to receive indexes // we are too busy to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.dhtInCacheSize() + ")."); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.dhtCacheSize() + ").");
granted = false; // don't accept more words if there are too many words to flush granted = false; // don't accept more words if there are too many words to flush
result = "busy"; result = "busy";
pause = 60000; pause = 60000;
@ -163,7 +163,7 @@ public final class transferRWI {
} }
// learn entry // learn entry
sb.webIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true); sb.webIndex.addEntry(wordHash, iEntry, System.currentTimeMillis());
serverCore.checkInterruption(); serverCore.checkInterruption();
// check if we need to ask for the corresponding URL // check if we need to ask for the corresponding URL
@ -200,7 +200,7 @@ public final class transferRWI {
result = "ok"; result = "ok";
if (checkLimit) { if (checkLimit) {
pause = (sb.webIndex.dhtInCacheSize() < 500) ? 0 : sb.webIndex.dhtInCacheSize(); // estimation of necessary pause time pause = (sb.webIndex.dhtCacheSize() < 500) ? 0 : sb.webIndex.dhtCacheSize(); // estimation of necessary pause time
} }
} }

@ -48,7 +48,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
public int cacheReferenceCountLimit; // the maximum number of references to a single RWI entity public int cacheReferenceCountLimit; // the maximum number of references to a single RWI entity
public long cacheReferenceAgeLimit; // the maximum age (= time not changed) of a RWI entity public long cacheReferenceAgeLimit; // the maximum age (= time not changed) of a RWI entity
private final serverLog log; private final serverLog log;
private final File oldDumpFile, newDumpFile; private final File dumpFile;
private indexContainerRAMHeap heap; private indexContainerRAMHeap heap;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@ -58,7 +58,6 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
final int entityCacheMaxSize, final int entityCacheMaxSize,
final int wCacheReferenceCountLimitInit, final int wCacheReferenceCountLimitInit,
final long wCacheReferenceAgeLimitInit, final long wCacheReferenceAgeLimitInit,
final String oldHeapName,
final String newHeapName, final String newHeapName,
final serverLog log) { final serverLog log) {
@ -71,24 +70,13 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit; this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit;
this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit; this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit;
this.log = log; this.log = log;
this.oldDumpFile = new File(databaseRoot, oldHeapName); this.dumpFile = new File(databaseRoot, newHeapName);
this.newDumpFile = new File(databaseRoot, newHeapName);
this.heap = new indexContainerRAMHeap(payloadrow); this.heap = new indexContainerRAMHeap(payloadrow);
// read in dump of last session // read in dump of last session
boolean initFailed = false; boolean initFailed = false;
if (newDumpFile.exists() && oldDumpFile.exists()) { if (dumpFile.exists()) try {
// we need only one, delete the old heap.initWriteModeFromBLOB(dumpFile);
oldDumpFile.delete();
}
if (oldDumpFile.exists()) try {
heap.initWriteModeFromHeap(oldDumpFile);
} catch (IOException e) {
initFailed = true;
e.printStackTrace();
}
if (newDumpFile.exists()) try {
heap.initWriteModeFromBLOB(newDumpFile);
} catch (IOException e) { } catch (IOException e) {
initFailed = true; initFailed = true;
e.printStackTrace(); e.printStackTrace();
@ -97,7 +85,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
log.logSevere("unable to restore cache dump"); log.logSevere("unable to restore cache dump");
// get empty dump // get empty dump
heap.initWriteMode(); heap.initWriteMode();
} else if (oldDumpFile.exists() || newDumpFile.exists()) { } else if (dumpFile.exists()) {
// initialize scores for cache organization // initialize scores for cache organization
for (final indexContainer ic : (Iterable<indexContainer>) heap.wordContainers(null, false)) { for (final indexContainer ic : (Iterable<indexContainer>) heap.wordContainers(null, false)) {
this.hashDate.setScore(ic.getWordHash(), intTime(ic.lastWrote())); this.hashDate.setScore(ic.getWordHash(), intTime(ic.lastWrote()));
@ -327,7 +315,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
// dump cache // dump cache
try { try {
//heap.dumpold(this.oldDumpFile); //heap.dumpold(this.oldDumpFile);
heap.dump(this.newDumpFile); heap.dump(this.dumpFile);
} catch (final IOException e){ } catch (final IOException e){
log.logSevere("unable to dump cache: " + e.getMessage(), e); log.logSevere("unable to dump cache: " + e.getMessage(), e);
} }
@ -336,8 +324,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable<indexC
hashDate.clear(); hashDate.clear();
} }
public Iterator iterator() { public Iterator<indexContainer> iterator() {
// TODO Auto-generated method stub return wordContainers(null, false);
return null;
} }
} }

@ -60,7 +60,7 @@ public class kelondroBufferedEcoFS {
} }
public synchronized long size() throws IOException { public synchronized long size() throws IOException {
return efs.size(); return efs == null ? 0 : efs.size();
} }
public File filename() { public File filename() {

@ -69,7 +69,6 @@ import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage; import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyPeerActions; import de.anomic.yacy.yacyPeerActions;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.yacyURL; import de.anomic.yacy.yacyURL;
@ -99,7 +98,7 @@ public final class plasmaWordIndex implements indexRI {
private final kelondroByteOrder indexOrder = kelondroBase64Order.enhancedCoder; private final kelondroByteOrder indexOrder = kelondroBase64Order.enhancedCoder;
private final indexRAMRI dhtOutCache, dhtInCache; private final indexRAMRI dhtCache;
private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster
private final serverLog log; private final serverLog log;
private indexRepositoryReference referenceURL; private indexRepositoryReference referenceURL;
@ -141,23 +140,21 @@ public final class plasmaWordIndex implements indexRI {
} }
} }
} }
/*
*
final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE"); final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE");
if (!(textindexcache.exists())) textindexcache.mkdirs(); if (!(textindexcache.exists())) textindexcache.mkdirs();
if (new File(textindexcache, "index.dhtout.blob").exists()) { if (new File(textindexcache, "index.dhtin.blob").exists()) {
this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.heap", "index.dhtout.blob", log); // migration of the both caches into one
indexRAMRI dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.heap", "index.dhtin.blob", log); this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
indexContainer c1; indexRAMRI dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log);
for (indexContainer c: dhtInCache) { for (indexContainer c: dhtInCache) {
this.dhtCache.addEntries(c); this.dhtCache.addEntries(c);
} }
new File(textindexcache, "index.dhtin.blob").delete();
} else {
// read in new BLOB
this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
} }
*/
final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE");
if (!(textindexcache.exists())) textindexcache.mkdirs();
this.dhtOutCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.heap", "index.dhtout.blob", log);
this.dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.heap", "index.dhtin.blob", log);
// create collections storage path // create collections storage path
final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION"); final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION");
@ -247,8 +244,7 @@ public final class plasmaWordIndex implements indexRI {
} }
public void clear() { public void clear() {
dhtInCache.clear(); dhtCache.clear();
dhtOutCache.clear();
collections.clear(); collections.clear();
try { try {
referenceURL.clear(); referenceURL.clear();
@ -421,46 +417,30 @@ public final class plasmaWordIndex implements indexRI {
} }
public int minMem() { public int minMem() {
return 1024*1024 /* indexing overhead */ + dhtOutCache.minMem() + dhtInCache.minMem() + collections.minMem(); return 1024*1024 /* indexing overhead */ + dhtCache.minMem() + collections.minMem();
} }
public int maxURLinDHTOutCache() { public int maxURLinDHTCache() {
return dhtOutCache.maxURLinCache(); return dhtCache.maxURLinCache();
} }
public long minAgeOfDHTOutCache() { public long minAgeOfDHTCache() {
return dhtOutCache.minAgeOfCache(); return dhtCache.minAgeOfCache();
} }
public long maxAgeOfDHTOutCache() { public long maxAgeOfDHTCache() {
return dhtOutCache.maxAgeOfCache(); return dhtCache.maxAgeOfCache();
} }
public int maxURLinDHTInCache() { public int dhtCacheSize() {
return dhtInCache.maxURLinCache(); return dhtCache.size();
}
public long minAgeOfDHTInCache() {
return dhtInCache.minAgeOfCache();
}
public long maxAgeOfDHTInCache() {
return dhtInCache.maxAgeOfCache();
}
public int dhtOutCacheSize() {
return dhtOutCache.size();
}
public int dhtInCacheSize() {
return dhtInCache.size();
} }
public long dhtCacheSizeBytes(final boolean in) { public long dhtCacheSizeBytes() {
// calculate the real size in bytes of DHT-In/Out-Cache // calculate the real size in bytes of DHT-In/Out-Cache
long cacheBytes = 0; long cacheBytes = 0;
final long entryBytes = indexRWIRowEntry.urlEntryRow.objectsize; final long entryBytes = indexRWIRowEntry.urlEntryRow.objectsize;
final indexRAMRI cache = (in ? dhtInCache : dhtOutCache); final indexRAMRI cache = (dhtCache);
synchronized (cache) { synchronized (cache) {
final Iterator<indexContainer> it = cache.wordContainers(null, false); final Iterator<indexContainer> it = cache.wordContainers(null, false);
while (it.hasNext()) cacheBytes += it.next().size() * entryBytes; while (it.hasNext()) cacheBytes += it.next().size() * entryBytes;
@ -469,8 +449,7 @@ public final class plasmaWordIndex implements indexRI {
} }
public void setMaxWordCount(final int maxWords) { public void setMaxWordCount(final int maxWords) {
dhtOutCache.setMaxWordCount(maxWords); dhtCache.setMaxWordCount(maxWords);
dhtInCache.setMaxWordCount(maxWords);
} }
public void dhtFlushControl(final indexRAMRI theCache) { public void dhtFlushControl(final indexRAMRI theCache) {
@ -500,38 +479,18 @@ public final class plasmaWordIndex implements indexRI {
return new indexContainer(wordHash, indexRWIRowEntry.urlEntryRow, elementCount); return new indexContainer(wordHash, indexRWIRowEntry.urlEntryRow, elementCount);
} }
public void addEntry(final String wordHash, final indexRWIRowEntry entry, final long updateTime, boolean dhtInCase) { public void addEntry(final String wordHash, final indexRWIRowEntry entry, final long updateTime) {
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacySeed.shallBeOwnWord(seedDB, wordHash, this.netRedundancy))) dhtInCase = true;
// add the entry // add the entry
if (dhtInCase) { dhtCache.addEntry(wordHash, entry, updateTime, true);
dhtInCache.addEntry(wordHash, entry, updateTime, true); dhtFlushControl(this.dhtCache);
dhtFlushControl(this.dhtInCache);
} else {
dhtOutCache.addEntry(wordHash, entry, updateTime, false);
dhtFlushControl(this.dhtOutCache);
}
} }
public void addEntries(final indexContainer entries) { public void addEntries(final indexContainer entries) {
addEntries(entries, false);
}
public void addEntries(final indexContainer entries, boolean dhtInCase) {
assert (entries.row().objectsize == indexRWIRowEntry.urlEntryRow.objectsize); assert (entries.row().objectsize == indexRWIRowEntry.urlEntryRow.objectsize);
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacySeed.shallBeOwnWord(seedDB, entries.getWordHash(), this.netRedundancy))) dhtInCase = true;
// add the entry // add the entry
if (dhtInCase) { dhtCache.addEntries(entries);
dhtInCache.addEntries(entries); dhtFlushControl(this.dhtCache);
dhtFlushControl(this.dhtInCache);
} else {
dhtOutCache.addEntries(entries);
dhtFlushControl(this.dhtOutCache);
}
} }
public void flushCacheFor(int time) { public void flushCacheFor(int time) {
@ -539,19 +498,9 @@ public final class plasmaWordIndex implements indexRI {
} }
private synchronized void flushCacheUntil(long timeout) { private synchronized void flushCacheUntil(long timeout) {
long timeout0 = System.currentTimeMillis() + (timeout - System.currentTimeMillis()) / 10 * 6; while (System.currentTimeMillis() < timeout && dhtCache.size() > 0) {
// we give 60% for dhtIn to prefer filling of cache with dht transmission flushCacheOne(dhtCache);
//int cIn = 0;
while (System.currentTimeMillis() < timeout0 && dhtInCache.size() > 0) {
flushCacheOne(dhtInCache);
//cIn++;
} }
//int cOut = 0;
while (System.currentTimeMillis() < timeout && dhtOutCache.size() > 0) {
flushCacheOne(dhtOutCache);
//cOut++;
}
//System.out.println("*** DEBUG cache flush: cIn = " + cIn + ", cOut = " + cOut);
} }
private synchronized void flushCacheOne(final indexRAMRI ram) { private synchronized void flushCacheOne(final indexRAMRI ram) {
@ -615,7 +564,7 @@ public final class plasmaWordIndex implements indexRI {
doctype, doctype,
outlinksSame, outlinksOther, outlinksSame, outlinksOther,
wprop.flags); wprop.flags);
addEntry(indexWord.word2hash(word), ientry, System.currentTimeMillis(), false); addEntry(indexWord.word2hash(word), ientry, System.currentTimeMillis());
wordCount++; wordCount++;
} }
@ -623,8 +572,7 @@ public final class plasmaWordIndex implements indexRI {
} }
public boolean hasContainer(final String wordHash) { public boolean hasContainer(final String wordHash) {
if (dhtOutCache.hasContainer(wordHash)) return true; if (dhtCache.hasContainer(wordHash)) return true;
if (dhtInCache.hasContainer(wordHash)) return true;
if (collections.hasContainer(wordHash)) return true; if (collections.hasContainer(wordHash)) return true;
return false; return false;
} }
@ -637,12 +585,7 @@ public final class plasmaWordIndex implements indexRI {
// get from cache // get from cache
indexContainer container; indexContainer container;
container = dhtOutCache.getContainer(wordHash, urlselection); container = dhtCache.getContainer(wordHash, urlselection);
if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection);
} else {
container.addAllUnique(dhtInCache.getContainer(wordHash, urlselection));
}
// get from collection index // get from collection index
if (container == null) { if (container == null) {
@ -727,7 +670,7 @@ public final class plasmaWordIndex implements indexRI {
} }
public int size() { public int size() {
return java.lang.Math.max(collections.size(), java.lang.Math.max(dhtInCache.size(), dhtOutCache.size())); return java.lang.Math.max(collections.size(), dhtCache.size());
} }
public int collectionsSize() { public int collectionsSize() {
@ -735,12 +678,11 @@ public final class plasmaWordIndex implements indexRI {
} }
public int cacheSize() { public int cacheSize() {
return dhtInCache.size() + dhtOutCache.size(); return dhtCache.size();
} }
public void close() { public void close() {
dhtInCache.close(); dhtCache.close();
dhtOutCache.close();
collections.close(); collections.close();
referenceURL.close(); referenceURL.close();
seedDB.close(); seedDB.close();
@ -754,18 +696,15 @@ public final class plasmaWordIndex implements indexRI {
final indexContainer c = new indexContainer( final indexContainer c = new indexContainer(
wordHash, wordHash,
indexRWIRowEntry.urlEntryRow, indexRWIRowEntry.urlEntryRow,
dhtInCache.sizeContainer(wordHash) + dhtOutCache.sizeContainer(wordHash) dhtCache.sizeContainer(wordHash));
); c.addAllUnique(dhtCache.deleteContainer(wordHash));
c.addAllUnique(dhtInCache.deleteContainer(wordHash));
c.addAllUnique(dhtOutCache.deleteContainer(wordHash));
c.addAllUnique(collections.deleteContainer(wordHash)); c.addAllUnique(collections.deleteContainer(wordHash));
return c; return c;
} }
public boolean removeEntry(final String wordHash, final String urlHash) { public boolean removeEntry(final String wordHash, final String urlHash) {
boolean removed = false; boolean removed = false;
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash)); removed = removed | (dhtCache.removeEntry(wordHash, urlHash));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash));
removed = removed | (collections.removeEntry(wordHash, urlHash)); removed = removed | (collections.removeEntry(wordHash, urlHash));
return removed; return removed;
} }
@ -783,16 +722,14 @@ public final class plasmaWordIndex implements indexRI {
public int removeEntries(final String wordHash, final Set<String> urlHashes) { public int removeEntries(final String wordHash, final Set<String> urlHashes) {
int removed = 0; int removed = 0;
removed += dhtInCache.removeEntries(wordHash, urlHashes); removed += dhtCache.removeEntries(wordHash, urlHashes);
removed += dhtOutCache.removeEntries(wordHash, urlHashes);
removed += collections.removeEntries(wordHash, urlHashes); removed += collections.removeEntries(wordHash, urlHashes);
return removed; return removed;
} }
public String removeEntriesExpl(final String wordHash, final Set<String> urlHashes) { public String removeEntriesExpl(final String wordHash, final Set<String> urlHashes) {
String removed = ""; String removed = "";
removed += dhtInCache.removeEntries(wordHash, urlHashes) + ", "; removed += dhtCache.removeEntries(wordHash, urlHashes) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes) + ", ";
removed += collections.removeEntries(wordHash, urlHashes); removed += collections.removeEntries(wordHash, urlHashes);
return removed; return removed;
} }
@ -825,7 +762,7 @@ public final class plasmaWordIndex implements indexRI {
containerOrder.rotate(emptyContainer(startHash, 0)); containerOrder.rotate(emptyContainer(startHash, 0));
final TreeSet<indexContainer> containers = new TreeSet<indexContainer>(containerOrder); final TreeSet<indexContainer> containers = new TreeSet<indexContainer>(containerOrder);
final Iterator<indexContainer> i = wordContainers(startHash, ram, rot); final Iterator<indexContainer> i = wordContainers(startHash, ram, rot);
if (ram) count = Math.min(dhtOutCache.size(), count); if (ram) count = Math.min(dhtCache.size(), count);
indexContainer container; indexContainer container;
// this loop does not terminate using the i.hasNex() predicate when rot == true // this loop does not terminate using the i.hasNex() predicate when rot == true
// because then the underlying iterator is a rotating iterator without termination // because then the underlying iterator is a rotating iterator without termination
@ -958,7 +895,7 @@ public final class plasmaWordIndex implements indexRI {
public synchronized kelondroCloneableIterator<indexContainer> wordContainers(final String startHash, final boolean ram, final boolean rot) { public synchronized kelondroCloneableIterator<indexContainer> wordContainers(final String startHash, final boolean ram, final boolean rot) {
final kelondroCloneableIterator<indexContainer> i = wordContainers(startHash, ram); final kelondroCloneableIterator<indexContainer> i = wordContainers(startHash, ram);
if (rot) { if (rot) {
return new kelondroRotateIterator<indexContainer>(i, new String(kelondroBase64Order.zero(startHash.length())), dhtOutCache.size() + ((ram) ? 0 : collections.size())); return new kelondroRotateIterator<indexContainer>(i, new String(kelondroBase64Order.zero(startHash.length())), dhtCache.size() + ((ram) ? 0 : collections.size()));
} }
return i; return i;
} }
@ -967,10 +904,10 @@ public final class plasmaWordIndex implements indexRI {
final kelondroOrder<indexContainer> containerOrder = new indexContainerOrder(indexOrder.clone()); final kelondroOrder<indexContainer> containerOrder = new indexContainerOrder(indexOrder.clone());
containerOrder.rotate(emptyContainer(startWordHash, 0)); containerOrder.rotate(emptyContainer(startWordHash, 0));
if (ram) { if (ram) {
return dhtOutCache.wordContainers(startWordHash, false); return dhtCache.wordContainers(startWordHash, false);
} }
return new kelondroMergeIterator<indexContainer>( return new kelondroMergeIterator<indexContainer>(
dhtOutCache.wordContainers(startWordHash, false), dhtCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false), collections.wordContainers(startWordHash, false),
containerOrder, containerOrder,
indexContainer.containerMergeMethod, indexContainer.containerMergeMethod,

@ -639,7 +639,7 @@ public final class yacyClient {
// insert the containers to the index // insert the containers to the index
for (int m = 0; m < words; m++) { for (int m = 0; m < words; m++) {
wordIndex.addEntries(container[m], true); wordIndex.addEntries(container[m]);
} }
// generate statistics // generate statistics

Loading…
Cancel
Save