From b5e40e2fa23bda6670e807d85c72064d99854694 Mon Sep 17 00:00:00 2001 From: karlchenofhell Date: Tue, 17 Oct 2006 21:01:35 +0000 Subject: [PATCH] - fix for http://www.yacy-forum.de/viewtopic.php?t=2974 (no cache-sizes for new db) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2792 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/PerformanceMemory_p.html | 12 ++--- htroot/PerformanceMemory_p.java | 50 ++++++++++++------- source/de/anomic/plasma/plasmaCrawlEURL.java | 4 ++ .../de/anomic/plasma/plasmaCrawlStacker.java | 10 +++- source/de/anomic/plasma/plasmaWordIndex.java | 8 +-- 5 files changed, 55 insertions(+), 29 deletions(-) diff --git a/htroot/PerformanceMemory_p.html b/htroot/PerformanceMemory_p.html index 79f5f7222..8824727a0 100644 --- a/htroot/PerformanceMemory_p.html +++ b/htroot/PerformanceMemory_p.html @@ -96,7 +96,7 @@ Default Max Best Max - + #(useRWICache)#:: RWI Assortment Cluster #[slreqRWI]# @@ -116,7 +116,7 @@ The Assortment Cluster stores most of the page indexes. Flushing speed of the temporary RWI cache depends on the size of this file cache. Increasing the space of this cache will speed up crawls with a depth > 3. - + #(/useRWICache)# HTTP Response Header @@ -158,7 +158,7 @@ This is the database that holds the hash/url - relation and properties regarding the url like load date and server date. This cache is very important for a fast search process. Increasing the cache size will result in more search results and less IO during DHT transfer. - + #(usePreNURLCache)#:: 'pre-noticed' URLs #[slreqPreNURL]# @@ -176,7 +176,7 @@ #[dfltPreNURL]# #[bestPreNURL]# - + #(/usePreNURLCache)# 'noticed' URLs @@ -197,7 +197,7 @@ A noticed URL is one that was discovered during crawling but was not loaded yet. Increasing the cache size will result in faster double-check during URL recognition when doing crawls. - + #(useEURLCache)#:: 'error' URLs #[slreqEURL]# @@ -216,7 +216,7 @@ #[bestEURL]# URLs that cannot be loaded are stored in this database. It is also used for double-checked during crawling. Increasing the cache size will most probably speed up crawling slightly, but not significantly. - + #(/useEURLCache)# DHT Control diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 45d2b895b..3b7a98370 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -166,13 +166,17 @@ public class PerformanceMemory_p { currTotal = 0; dfltTotal = 0; bestTotal = 0; - - req = sb.wordIndex.size(); - chk = sb.wordIndex.assortmentsCacheChunkSizeAvg(); - obj = sb.wordIndex.assortmentsCacheObjectSizeAvg(); - slt = sb.wordIndex.assortmentsCacheNodeStatus(); - ost = sb.wordIndex.assortmentsCacheObjectStatus(); - putprop(prop, env, "RWI", set); + + if (sb.wordIndex.useCollectionIndex) { + prop.put("useRWICache", 0); + } else { + req = sb.wordIndex.size(); + chk = sb.wordIndex.assortmentsCacheChunkSizeAvg(); + obj = sb.wordIndex.assortmentsCacheObjectSizeAvg(); + slt = sb.wordIndex.assortmentsCacheNodeStatus(); + ost = sb.wordIndex.assortmentsCacheObjectStatus(); + putprop(prop, env, "RWI", set); + } req = sb.cacheManager.dbSize(); chk = sb.cacheManager.cacheNodeChunkSize(); @@ -188,12 +192,16 @@ public class PerformanceMemory_p { ost = sb.urlPool.loadedURL.cacheObjectStatus(); putprop(prop, env, "LURL", set); - req = sb.sbStackCrawlThread.size(); - chk = sb.sbStackCrawlThread.cacheNodeChunkSize(); - obj = sb.sbStackCrawlThread.cacheObjectChunkSize(); - slt = sb.sbStackCrawlThread.cacheNodeStatus(); - ost = sb.sbStackCrawlThread.cacheObjectStatus(); - putprop(prop, env, "PreNURL", set); + if (sb.sbStackCrawlThread.getDBType() != de.anomic.plasma.plasmaCrawlStacker.QUEUE_DB_TYPE_TREE) { + prop.put("usePreNURLCache", 0); + } else { + req = sb.sbStackCrawlThread.size(); + chk = sb.sbStackCrawlThread.cacheNodeChunkSize(); + obj = sb.sbStackCrawlThread.cacheObjectChunkSize(); + slt = sb.sbStackCrawlThread.cacheNodeStatus(); + ost = sb.sbStackCrawlThread.cacheObjectStatus(); + putprop(prop, env, "PreNURL", set); + } req = sb.urlPool.noticeURL.size(); chk = sb.urlPool.noticeURL.cacheNodeChunkSize(); @@ -202,12 +210,16 @@ public class PerformanceMemory_p { ost = sb.urlPool.noticeURL.cacheObjectStatus(); putprop(prop, env, "NURL", set); - req = sb.urlPool.errorURL.size(); - chk = sb.urlPool.errorURL.cacheNodeChunkSize(); - obj = sb.urlPool.errorURL.cacheObjectChunkSize(); - slt = sb.urlPool.errorURL.cacheNodeStatus(); - ost = sb.urlPool.errorURL.cacheObjectStatus(); - putprop(prop, env, "EURL", set); + if (sb.urlPool.errorURL.getUseNewDB()) { + prop.put("useEURLCache", 0); + } else { + req = sb.urlPool.errorURL.size(); + chk = sb.urlPool.errorURL.cacheNodeChunkSize(); + obj = sb.urlPool.errorURL.cacheObjectChunkSize(); + slt = sb.urlPool.errorURL.cacheNodeStatus(); + ost = sb.urlPool.errorURL.cacheObjectStatus(); + putprop(prop, env, "EURL", set); + } req = yacyCore.seedDB.sizeConnected() + yacyCore.seedDB.sizeDisconnected() + yacyCore.seedDB.sizePotential(); chk = yacyCore.seedDB.cacheNodeChunkSize(); diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index f53dd0f55..b41ceefce 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -184,6 +184,10 @@ public class plasmaCrawlEURL extends indexURL { public synchronized Entry getEntry(String hash) throws IOException { return new Entry(hash); } + + public boolean getUseNewDB() { + return (urlIndexFile instanceof kelondroFlexTable); + } public boolean exists(String urlHash) { try { diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 5fd51eec6..c1230ed69 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -127,6 +127,10 @@ public final class plasmaCrawlStacker { return this.theWorkerPoolConfig; } + public int getDBType() { + return this.queue.getDBType(); + } + public void setPoolConfig(GenericObjectPool.Config newConfig) { this.theWorkerPool.setConfig(newConfig); } @@ -174,7 +178,7 @@ public final class plasmaCrawlStacker { public long[] cacheObjectStatus() { return this.queue.cacheObjectStatus(); - } + } public void job() { try { @@ -747,6 +751,10 @@ public final class plasmaCrawlStacker { } } + public int getDBType() { + return this.dbtype; + } + public stackCrawlMessage waitForMessage() throws InterruptedException, IOException { this.readSync.P(); this.writeSync.P(); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 583709256..b7819e9ca 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -139,7 +139,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } public int[] assortmentsSizes() { - return (assortmentCluster == null) ? null : assortmentCluster.sizes(); + return (assortmentCluster == null) ? new int[assortmentCount] : assortmentCluster.sizes(); } public int assortmentsCacheChunkSizeAvg() { @@ -151,11 +151,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } public int[] assortmentsCacheNodeStatus() { - return (assortmentCluster == null) ? null : assortmentCluster.cacheNodeStatus(); + if (assortmentCluster != null) return assortmentCluster.cacheNodeStatus(); + return new int[]{0,0,0,0,0,0,0,0,0,0}; } public long[] assortmentsCacheObjectStatus() { - return (assortmentCluster == null) ? null : assortmentCluster.cacheObjectStatus(); + if (assortmentCluster != null) return assortmentCluster.cacheObjectStatus(); + return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; } public void setMaxWordCount(int maxWords) {