From dedfc7df7f5e2be64deeed474e769e8fe9016b29 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 22 Jan 2009 00:03:54 +0000 Subject: [PATCH] removed distinction between DHT-in and DHT-out. This is necessary to make room for the new cell data structure, which cannot use this this distinction in the first place, but will enable the same meaning with different mechanisms (segments, later) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5511 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/PerformanceQueues_p.html | 23 +-- htroot/PerformanceQueues_p.java | 15 +- htroot/PerformanceQueues_p.xml | 28 +--- htroot/WatchCrawler_p.java | 21 ++- htroot/api/queues_p.java | 3 - htroot/api/status_p.java | 9 +- htroot/api/status_p.xml | 6 +- htroot/js/WatchCrawler.js | 4 +- htroot/yacy/transferRWI.java | 8 +- source/de/anomic/index/indexRAMRI.java | 29 +--- .../kelondro/kelondroBufferedEcoFS.java | 2 +- source/de/anomic/plasma/plasmaWordIndex.java | 157 ++++++------------ source/de/anomic/yacy/yacyClient.java | 2 +- 13 files changed, 109 insertions(+), 198 deletions(-) mode change 100644 => 100755 htroot/api/queues_p.java diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html index c34ff8eea..1f2cfcd57 100644 --- a/htroot/PerformanceQueues_p.html +++ b/htroot/PerformanceQueues_p.html @@ -68,14 +68,13 @@
Cache Settings: - - - + + - + - - + - - + - - + - - + - -
Cache TypeDHT-OutDHT-InRAM Cache Description
URLs in RAM buffer:#[urlCacheSize]##[urlCacheSize]# This is the size of the URL write buffer. Its purpose is to buffer incoming URLs in case of search result transmission and during DHT transfer. @@ -83,8 +82,7 @@
Words in RAM cache:
(Size in KBytes)
#[wordCacheWSize]#
(#[wordCacheWSizeKBytes]# KB)
#[wordCacheKSize]#
(#[wordCacheKSizeKBytes]# KB)
#[wordCacheSize]#
(#[wordCacheSizeKBytes]# KB)
This is the current size of the word caches. The indexing cache speeds up the indexing process, the DHT cache holds indexes temporary for approval. @@ -93,8 +91,7 @@
Maximum URLs currently assigned
to one cached word:
#[maxURLinWCache]##[maxURLinKCache]##[maxURLinCache]# This is the maximum size of URLs assigned to a single word cache entry. If this is a big number, it shows that the caching works efficiently. @@ -102,23 +99,21 @@
Maximum age of a word:#[maxAgeOfWCache]##[maxAgeOfKCache]##[maxAgeOfCache]# This is the maximum age of a word in an index in minutes.
Minimum age of a word:#[minAgeOfWCache]##[minAgeOfKCache]##[minAgeOfCache]# This is the minimum age of a word in an index in minutes.
Maximum number of words in cache: + @@ -129,7 +124,7 @@
Initial space of words in cache: + diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index 762133131..76b1a16da 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -286,16 +286,11 @@ public class PerformanceQueues_p { // table cache settings prop.putNum("urlCacheSize", switchboard.webIndex.getURLwriteCacheSize()); - prop.putNum("wordCacheWSize", switchboard.webIndex.dhtOutCacheSize()); - prop.putNum("wordCacheKSize", switchboard.webIndex.dhtInCacheSize()); - prop.putNum("wordCacheWSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes(false)/1024); - prop.putNum("wordCacheKSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes(true)/1024); - prop.putNum("maxURLinWCache", switchboard.webIndex.maxURLinDHTOutCache()); - prop.putNum("maxURLinKCache", switchboard.webIndex.maxURLinDHTInCache()); - prop.putNum("maxAgeOfWCache", switchboard.webIndex.maxAgeOfDHTOutCache() / 1000 / 60); // minutes - prop.putNum("maxAgeOfKCache", switchboard.webIndex.maxAgeOfDHTInCache() / 1000 / 60); // minutes - prop.putNum("minAgeOfWCache", switchboard.webIndex.minAgeOfDHTOutCache() / 1000 / 60); // minutes - prop.putNum("minAgeOfKCache", switchboard.webIndex.minAgeOfDHTInCache() / 1000 / 60); // minutes + prop.putNum("wordCacheSize", switchboard.webIndex.dhtCacheSize()); + prop.putNum("wordCacheSizeKBytes", switchboard.webIndex.dhtCacheSizeBytes()/1024); + prop.putNum("maxURLinCache", switchboard.webIndex.maxURLinDHTCache()); + prop.putNum("maxAgeOfCache", switchboard.webIndex.maxAgeOfDHTCache() / 1000 / 60); // minutes + prop.putNum("minAgeOfCache", switchboard.webIndex.minAgeOfDHTCache() / 1000 / 60); // minutes prop.putNum("maxWaitingWordFlush", switchboard.getConfigLong("maxWaitingWordFlush", 180)); prop.put("wordCacheMaxCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000)); prop.put("wordCacheInitCount", switchboard.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_INIT_COUNT, 30000)); diff --git a/htroot/PerformanceQueues_p.xml b/htroot/PerformanceQueues_p.xml index 173a8c830..a1f95c117 100644 --- a/htroot/PerformanceQueues_p.xml +++ b/htroot/PerformanceQueues_p.xml @@ -24,26 +24,14 @@ #{/table}# - - #[urlCacheSize]# - #[wordCacheWSize]# - #[maxURLinWCache]# - #[maxAgeOfWCache]# - #[minAgeOfWCache]# - #[wordOutCacheMaxCount]# - #[wordCacheInitCount]# - #[wordFlushSize]# - - - #[urlCacheSize]# - #[wordCacheKSize]# - #[maxURLinKCache]# - #[maxAgeOfKCache]# - #[minAgeOfKCache]# - #[wordInCacheMaxCount]# - #[wordCacheInitCount]# - #[wordFlushSize]# - + #[urlCacheSize]# + #[wordCacheSize]# + #[maxURLinCache]# + #[maxAgeOfCache]# + #[minAgeOfCache]# + #[wordOutCacheMaxCount]# + #[wordCacheInitCount]# + #[wordFlushSize]# #{pool}# diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 9a7a9f67e..78576ff8c 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -36,7 +36,6 @@ import java.util.Set; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; -import api.queues_p; import de.anomic.crawler.CrawlEntry; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.SitemapImporter; @@ -68,7 +67,23 @@ public class WatchCrawler_p { // return variable that accumulates replacements final plasmaSwitchboard sb = (plasmaSwitchboard) env; // inital values for AJAX Elements (without JavaScript) - final serverObjects prop = queues_p.respond(header, post, env); + final serverObjects prop = new serverObjects(); + prop.put("rejected", 0); + prop.put("indexingSize", 0); + prop.put("indexingMax", 0); + prop.put("urlpublictextSize", 0); + prop.put("rwipublictextSize", 0); + prop.put("list", "0"); + prop.put("loaderSize", 0); + prop.put("loaderMax", 0); + prop.put("list-loader", 0); + prop.put("localCrawlSize", 0); + prop.put("localCrawlState", ""); + prop.put("limitCrawlSize", 0); + prop.put("limitCrawlState", ""); + prop.put("remoteCrawlSize", 0); + prop.put("remoteCrawlState", ""); + prop.put("list-remote", 0); prop.put("forwardToCrawlStart", "0"); prop.put("info", "0"); @@ -429,7 +444,7 @@ public class WatchCrawler_p { // return rewrite properties return prop; } - + private static long recrawlIfOlderC(final boolean recrawlIfOlderCheck, final int recrawlIfOlderNumber, final String crawlingIfOlderUnit) { if (!recrawlIfOlderCheck) return 0L; if (crawlingIfOlderUnit.equals("year")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L * 24L * 365L; diff --git a/htroot/api/queues_p.java b/htroot/api/queues_p.java old mode 100644 new mode 100755 index daf0ebe3b..7c2f9a19f --- a/htroot/api/queues_p.java +++ b/htroot/api/queues_p.java @@ -1,6 +1,3 @@ - -package api; - import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; diff --git a/htroot/api/status_p.java b/htroot/api/status_p.java index ebe601031..8f794c5e8 100644 --- a/htroot/api/status_p.java +++ b/htroot/api/status_p.java @@ -21,15 +21,14 @@ public class status_p { prop.setLocalized(false); prop.put("rejected", "0"); sb.updateMySeed(); - final int cacheOutSize = sb.webIndex.dhtOutCacheSize(); + final int cacheSize = sb.webIndex.dhtCacheSize(); final long cacheMaxSize = sb.getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 10000); prop.putNum("ppm", sb.currentPPM()); prop.putNum("qpm", sb.webIndex.seedDB.mySeed().getQPM()); - prop.putNum("wordCacheSize", sb.webIndex.dhtOutCacheSize() + sb.webIndex.dhtInCacheSize()); - prop.putNum("wordCacheWSize", cacheOutSize); - prop.putNum("wordCacheKSize", sb.webIndex.dhtInCacheSize()); + prop.putNum("wordCacheSize", sb.webIndex.dhtCacheSize()); + prop.putNum("wordCacheSize", cacheSize); prop.putNum("wordCacheMaxSize", cacheMaxSize); - prop.put("wordCacheWCount", cacheOutSize); + prop.put("wordCacheCount", cacheSize); prop.put("wordCacheMaxCount", cacheMaxSize); // diff --git a/htroot/api/status_p.xml b/htroot/api/status_p.xml index 7dced800b..550efc2dd 100644 --- a/htroot/api/status_p.xml +++ b/htroot/api/status_p.xml @@ -2,10 +2,8 @@ #[ppm]# #[wordCacheSize]# - #[wordCacheWSize]# - #[wordCacheWCount]# - #[wordCacheKSize]# - #[wordCacheMaxSize]# + #[wordCacheSize]# + #[wordCacheCount]# #[wordCacheMaxCount]# diff --git a/htroot/js/WatchCrawler.js b/htroot/js/WatchCrawler.js index f4633a9af..0cca57daa 100644 --- a/htroot/js/WatchCrawler.js +++ b/htroot/js/WatchCrawler.js @@ -91,8 +91,8 @@ function handleStatus(){ removeAllChildren(trafCrawlerSpan); trafCrawlerSpan.appendChild(document.createTextNode(Math.round((trafficCrawler) / 1024 / 10.24) / 100)); - var wordCache=getValue(getFirstChild(statusTag, "wordCacheWCount")); - var wordCacheSize=getValue(getFirstChild(statusTag, "wordCacheWSize")); + var wordCache=getValue(getFirstChild(statusTag, "wordCacheCount")); + var wordCacheSize=getValue(getFirstChild(statusTag, "wordCacheSize")); var wordCacheMax=getValue(getFirstChild(statusTag, "wordCacheMaxCount")); var wordCacheMaxSize=getValue(getFirstChild(statusTag, "wordCacheMaxSize")); diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index 2cd63f09f..1c4947e73 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -100,9 +100,9 @@ public final class transferRWI { sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted."); result = "not_granted"; pause = 0; - } else if (checkLimit && sb.webIndex.dhtInCacheSize() > cachelimit) { + } else if (checkLimit && sb.webIndex.dhtCacheSize() > cachelimit) { // we are too busy to receive indexes - sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.dhtInCacheSize() + ")."); + sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.webIndex.dhtCacheSize() + ")."); granted = false; // don't accept more words if there are too many words to flush result = "busy"; pause = 60000; @@ -163,7 +163,7 @@ public final class transferRWI { } // learn entry - sb.webIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true); + sb.webIndex.addEntry(wordHash, iEntry, System.currentTimeMillis()); serverCore.checkInterruption(); // check if we need to ask for the corresponding URL @@ -200,7 +200,7 @@ public final class transferRWI { result = "ok"; if (checkLimit) { - pause = (sb.webIndex.dhtInCacheSize() < 500) ? 0 : sb.webIndex.dhtInCacheSize(); // estimation of necessary pause time + pause = (sb.webIndex.dhtCacheSize() < 500) ? 0 : sb.webIndex.dhtCacheSize(); // estimation of necessary pause time } } diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java index 12c439e16..a434c733f 100644 --- a/source/de/anomic/index/indexRAMRI.java +++ b/source/de/anomic/index/indexRAMRI.java @@ -48,7 +48,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable) heap.wordContainers(null, false)) { this.hashDate.setScore(ic.getWordHash(), intTime(ic.lastWrote())); @@ -327,7 +315,7 @@ public final class indexRAMRI implements indexRI, indexRIReader, Iterable iterator() { + return wordContainers(null, false); } } diff --git a/source/de/anomic/kelondro/kelondroBufferedEcoFS.java b/source/de/anomic/kelondro/kelondroBufferedEcoFS.java index 0482cb670..5f27f49e4 100644 --- a/source/de/anomic/kelondro/kelondroBufferedEcoFS.java +++ b/source/de/anomic/kelondro/kelondroBufferedEcoFS.java @@ -60,7 +60,7 @@ public class kelondroBufferedEcoFS { } public synchronized long size() throws IOException { - return efs.size(); + return efs == null ? 0 : efs.size(); } public File filename() { diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index d2e94498c..70b5b3180 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -69,7 +69,6 @@ import de.anomic.xml.RSSFeed; import de.anomic.xml.RSSMessage; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyPeerActions; -import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacyURL; @@ -99,7 +98,7 @@ public final class plasmaWordIndex implements indexRI { private final kelondroByteOrder indexOrder = kelondroBase64Order.enhancedCoder; - private final indexRAMRI dhtOutCache, dhtInCache; + private final indexRAMRI dhtCache; private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster private final serverLog log; private indexRepositoryReference referenceURL; @@ -141,23 +140,21 @@ public final class plasmaWordIndex implements indexRI { } } } - /* - * + final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE"); if (!(textindexcache.exists())) textindexcache.mkdirs(); - if (new File(textindexcache, "index.dhtout.blob").exists()) { - this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.heap", "index.dhtout.blob", log); - indexRAMRI dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.heap", "index.dhtin.blob", log); - indexContainer c1; + if (new File(textindexcache, "index.dhtin.blob").exists()) { + // migration of the both caches into one + this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); + indexRAMRI dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log); for (indexContainer c: dhtInCache) { - this.dhtCache.addEntries(c); + this.dhtCache.addEntries(c); } + new File(textindexcache, "index.dhtin.blob").delete(); + } else { + // read in new BLOB + this.dhtCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); } - */ - final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE"); - if (!(textindexcache.exists())) textindexcache.mkdirs(); - this.dhtOutCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.heap", "index.dhtout.blob", log); - this.dhtInCache = new indexRAMRI(textindexcache, indexRWIRowEntry.urlEntryRow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.heap", "index.dhtin.blob", log); // create collections storage path final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION"); @@ -247,8 +244,7 @@ public final class plasmaWordIndex implements indexRI { } public void clear() { - dhtInCache.clear(); - dhtOutCache.clear(); + dhtCache.clear(); collections.clear(); try { referenceURL.clear(); @@ -421,46 +417,30 @@ public final class plasmaWordIndex implements indexRI { } public int minMem() { - return 1024*1024 /* indexing overhead */ + dhtOutCache.minMem() + dhtInCache.minMem() + collections.minMem(); + return 1024*1024 /* indexing overhead */ + dhtCache.minMem() + collections.minMem(); } - public int maxURLinDHTOutCache() { - return dhtOutCache.maxURLinCache(); + public int maxURLinDHTCache() { + return dhtCache.maxURLinCache(); } - public long minAgeOfDHTOutCache() { - return dhtOutCache.minAgeOfCache(); + public long minAgeOfDHTCache() { + return dhtCache.minAgeOfCache(); } - public long maxAgeOfDHTOutCache() { - return dhtOutCache.maxAgeOfCache(); + public long maxAgeOfDHTCache() { + return dhtCache.maxAgeOfCache(); } - public int maxURLinDHTInCache() { - return dhtInCache.maxURLinCache(); - } - - public long minAgeOfDHTInCache() { - return dhtInCache.minAgeOfCache(); - } - - public long maxAgeOfDHTInCache() { - return dhtInCache.maxAgeOfCache(); - } - - public int dhtOutCacheSize() { - return dhtOutCache.size(); - } - - public int dhtInCacheSize() { - return dhtInCache.size(); + public int dhtCacheSize() { + return dhtCache.size(); } - public long dhtCacheSizeBytes(final boolean in) { + public long dhtCacheSizeBytes() { // calculate the real size in bytes of DHT-In/Out-Cache long cacheBytes = 0; final long entryBytes = indexRWIRowEntry.urlEntryRow.objectsize; - final indexRAMRI cache = (in ? dhtInCache : dhtOutCache); + final indexRAMRI cache = (dhtCache); synchronized (cache) { final Iterator it = cache.wordContainers(null, false); while (it.hasNext()) cacheBytes += it.next().size() * entryBytes; @@ -469,8 +449,7 @@ public final class plasmaWordIndex implements indexRI { } public void setMaxWordCount(final int maxWords) { - dhtOutCache.setMaxWordCount(maxWords); - dhtInCache.setMaxWordCount(maxWords); + dhtCache.setMaxWordCount(maxWords); } public void dhtFlushControl(final indexRAMRI theCache) { @@ -500,38 +479,18 @@ public final class plasmaWordIndex implements indexRI { return new indexContainer(wordHash, indexRWIRowEntry.urlEntryRow, elementCount); } - public void addEntry(final String wordHash, final indexRWIRowEntry entry, final long updateTime, boolean dhtInCase) { - // set dhtInCase depending on wordHash - if ((!dhtInCase) && (yacySeed.shallBeOwnWord(seedDB, wordHash, this.netRedundancy))) dhtInCase = true; - + public void addEntry(final String wordHash, final indexRWIRowEntry entry, final long updateTime) { // add the entry - if (dhtInCase) { - dhtInCache.addEntry(wordHash, entry, updateTime, true); - dhtFlushControl(this.dhtInCache); - } else { - dhtOutCache.addEntry(wordHash, entry, updateTime, false); - dhtFlushControl(this.dhtOutCache); - } + dhtCache.addEntry(wordHash, entry, updateTime, true); + dhtFlushControl(this.dhtCache); } public void addEntries(final indexContainer entries) { - addEntries(entries, false); - } - - public void addEntries(final indexContainer entries, boolean dhtInCase) { assert (entries.row().objectsize == indexRWIRowEntry.urlEntryRow.objectsize); - - // set dhtInCase depending on wordHash - if ((!dhtInCase) && (yacySeed.shallBeOwnWord(seedDB, entries.getWordHash(), this.netRedundancy))) dhtInCase = true; - + // add the entry - if (dhtInCase) { - dhtInCache.addEntries(entries); - dhtFlushControl(this.dhtInCache); - } else { - dhtOutCache.addEntries(entries); - dhtFlushControl(this.dhtOutCache); - } + dhtCache.addEntries(entries); + dhtFlushControl(this.dhtCache); } public void flushCacheFor(int time) { @@ -539,19 +498,9 @@ public final class plasmaWordIndex implements indexRI { } private synchronized void flushCacheUntil(long timeout) { - long timeout0 = System.currentTimeMillis() + (timeout - System.currentTimeMillis()) / 10 * 6; - // we give 60% for dhtIn to prefer filling of cache with dht transmission - //int cIn = 0; - while (System.currentTimeMillis() < timeout0 && dhtInCache.size() > 0) { - flushCacheOne(dhtInCache); - //cIn++; + while (System.currentTimeMillis() < timeout && dhtCache.size() > 0) { + flushCacheOne(dhtCache); } - //int cOut = 0; - while (System.currentTimeMillis() < timeout && dhtOutCache.size() > 0) { - flushCacheOne(dhtOutCache); - //cOut++; - } - //System.out.println("*** DEBUG cache flush: cIn = " + cIn + ", cOut = " + cOut); } private synchronized void flushCacheOne(final indexRAMRI ram) { @@ -615,7 +564,7 @@ public final class plasmaWordIndex implements indexRI { doctype, outlinksSame, outlinksOther, wprop.flags); - addEntry(indexWord.word2hash(word), ientry, System.currentTimeMillis(), false); + addEntry(indexWord.word2hash(word), ientry, System.currentTimeMillis()); wordCount++; } @@ -623,8 +572,7 @@ public final class plasmaWordIndex implements indexRI { } public boolean hasContainer(final String wordHash) { - if (dhtOutCache.hasContainer(wordHash)) return true; - if (dhtInCache.hasContainer(wordHash)) return true; + if (dhtCache.hasContainer(wordHash)) return true; if (collections.hasContainer(wordHash)) return true; return false; } @@ -637,12 +585,7 @@ public final class plasmaWordIndex implements indexRI { // get from cache indexContainer container; - container = dhtOutCache.getContainer(wordHash, urlselection); - if (container == null) { - container = dhtInCache.getContainer(wordHash, urlselection); - } else { - container.addAllUnique(dhtInCache.getContainer(wordHash, urlselection)); - } + container = dhtCache.getContainer(wordHash, urlselection); // get from collection index if (container == null) { @@ -727,7 +670,7 @@ public final class plasmaWordIndex implements indexRI { } public int size() { - return java.lang.Math.max(collections.size(), java.lang.Math.max(dhtInCache.size(), dhtOutCache.size())); + return java.lang.Math.max(collections.size(), dhtCache.size()); } public int collectionsSize() { @@ -735,12 +678,11 @@ public final class plasmaWordIndex implements indexRI { } public int cacheSize() { - return dhtInCache.size() + dhtOutCache.size(); + return dhtCache.size(); } public void close() { - dhtInCache.close(); - dhtOutCache.close(); + dhtCache.close(); collections.close(); referenceURL.close(); seedDB.close(); @@ -754,18 +696,15 @@ public final class plasmaWordIndex implements indexRI { final indexContainer c = new indexContainer( wordHash, indexRWIRowEntry.urlEntryRow, - dhtInCache.sizeContainer(wordHash) + dhtOutCache.sizeContainer(wordHash) - ); - c.addAllUnique(dhtInCache.deleteContainer(wordHash)); - c.addAllUnique(dhtOutCache.deleteContainer(wordHash)); + dhtCache.sizeContainer(wordHash)); + c.addAllUnique(dhtCache.deleteContainer(wordHash)); c.addAllUnique(collections.deleteContainer(wordHash)); return c; } public boolean removeEntry(final String wordHash, final String urlHash) { boolean removed = false; - removed = removed | (dhtInCache.removeEntry(wordHash, urlHash)); - removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash)); + removed = removed | (dhtCache.removeEntry(wordHash, urlHash)); removed = removed | (collections.removeEntry(wordHash, urlHash)); return removed; } @@ -783,16 +722,14 @@ public final class plasmaWordIndex implements indexRI { public int removeEntries(final String wordHash, final Set urlHashes) { int removed = 0; - removed += dhtInCache.removeEntries(wordHash, urlHashes); - removed += dhtOutCache.removeEntries(wordHash, urlHashes); + removed += dhtCache.removeEntries(wordHash, urlHashes); removed += collections.removeEntries(wordHash, urlHashes); return removed; } public String removeEntriesExpl(final String wordHash, final Set urlHashes) { String removed = ""; - removed += dhtInCache.removeEntries(wordHash, urlHashes) + ", "; - removed += dhtOutCache.removeEntries(wordHash, urlHashes) + ", "; + removed += dhtCache.removeEntries(wordHash, urlHashes) + ", "; removed += collections.removeEntries(wordHash, urlHashes); return removed; } @@ -825,7 +762,7 @@ public final class plasmaWordIndex implements indexRI { containerOrder.rotate(emptyContainer(startHash, 0)); final TreeSet containers = new TreeSet(containerOrder); final Iterator i = wordContainers(startHash, ram, rot); - if (ram) count = Math.min(dhtOutCache.size(), count); + if (ram) count = Math.min(dhtCache.size(), count); indexContainer container; // this loop does not terminate using the i.hasNex() predicate when rot == true // because then the underlying iterator is a rotating iterator without termination @@ -958,7 +895,7 @@ public final class plasmaWordIndex implements indexRI { public synchronized kelondroCloneableIterator wordContainers(final String startHash, final boolean ram, final boolean rot) { final kelondroCloneableIterator i = wordContainers(startHash, ram); if (rot) { - return new kelondroRotateIterator(i, new String(kelondroBase64Order.zero(startHash.length())), dhtOutCache.size() + ((ram) ? 0 : collections.size())); + return new kelondroRotateIterator(i, new String(kelondroBase64Order.zero(startHash.length())), dhtCache.size() + ((ram) ? 0 : collections.size())); } return i; } @@ -967,10 +904,10 @@ public final class plasmaWordIndex implements indexRI { final kelondroOrder containerOrder = new indexContainerOrder(indexOrder.clone()); containerOrder.rotate(emptyContainer(startWordHash, 0)); if (ram) { - return dhtOutCache.wordContainers(startWordHash, false); + return dhtCache.wordContainers(startWordHash, false); } return new kelondroMergeIterator( - dhtOutCache.wordContainers(startWordHash, false), + dhtCache.wordContainers(startWordHash, false), collections.wordContainers(startWordHash, false), containerOrder, indexContainer.containerMergeMethod, diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 870756fc5..6f634f36a 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -639,7 +639,7 @@ public final class yacyClient { // insert the containers to the index for (int m = 0; m < words; m++) { - wordIndex.addEntries(container[m], true); + wordIndex.addEntries(container[m]); } // generate statistics