From 66964dc0155890855dbeb017acd3e2995efc6d14 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 25 Jun 2006 10:31:38 +0000 Subject: [PATCH] removed high/med/low from kelondroRecords cache control. this was done because testing showed that cache-delete operations slowed down record access most, even more that actual IO operations. Cache-delete operations appeared when entries were shifted from low-priority positions to high-priority positions. During a fill of x entries to a database, x/2 delete situation happen which caused two or more delete operations. removing the cache control means that these delete operations are not necessary any more, but it is more difficult to decide which cache elements shall be removed in case that the cache is full. There is not yet a stable solution for this case, but the advantage of a faster cache is more important that the flush problem. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2244 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- htroot/PerformanceMemory_p.html | 67 +++---- htroot/PerformanceMemory_p.java | 25 +-- source/de/anomic/data/blogBoard.java | 2 +- source/de/anomic/data/messageBoard.java | 2 +- source/de/anomic/data/userDB.java | 2 +- source/de/anomic/data/wikiBoard.java | 12 +- source/de/anomic/index/indexURL.java | 2 +- .../anomic/kelondro/kelondroIntBytesMap.java | 2 +- source/de/anomic/kelondro/kelondroMap.java | 2 +- .../de/anomic/kelondro/kelondroRecords.java | 186 +++--------------- source/de/anomic/kelondro/kelondroStack.java | 2 +- source/de/anomic/plasma/plasmaCrawlLURL.java | 2 +- .../de/anomic/plasma/plasmaCrawlProfile.java | 2 +- .../anomic/plasma/plasmaCrawlRobotsTxt.java | 2 +- source/de/anomic/plasma/plasmaHTCache.java | 2 +- .../de/anomic/plasma/plasmaSwitchboard.java | 2 +- source/de/anomic/plasma/plasmaWordIndex.java | 2 +- .../plasma/plasmaWordIndexAssortment.java | 2 +- .../plasmaWordIndexAssortmentCluster.java | 15 +- source/de/anomic/yacy/yacyNewsDB.java | 2 +- source/de/anomic/yacy/yacyNewsPool.java | 2 +- source/de/anomic/yacy/yacySeedDB.java | 15 +- 23 files changed, 95 insertions(+), 259 deletions(-) diff --git a/build.properties b/build.properties index b8c9f8248..80097225f 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.454 +releaseVersion=0.455 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/htroot/PerformanceMemory_p.html b/htroot/PerformanceMemory_p.html index f0ef1111f..115eaeeb4 100644 --- a/htroot/PerformanceMemory_p.html +++ b/htroot/PerformanceMemory_p.html @@ -83,14 +83,14 @@ DB Size
  Node Cache Object Cache -Memory Occupation (KBytes) +Memory Occupation (KBytes) Description
  -Chunk Size
high/med/low
(bytes) +Chunk
Size
(bytes) Empty
(avail.)
Slots -Used: High, Medium, Low Prio +Used
(filled)
Slots Node-Cache
Hit:Miss
Uniq:Doub
Del:Flush
 
Max
Size  
Hit-Size
Miss-Size @@ -99,16 +99,15 @@ Used Now Assigned Max Default Max -Good Max Best Max RWI Assortment Cluster #[slreqRWI]# -#[chunkRWI]# +#[chunkRWI]# #[slempRWI]# -#[slhigRWI]#
#[slmedRWI]#
#[sllowRWI]# +#[slfilRWI]# #[slhittmissRWI]#
#[sluniqdoubRWI]#
#[slflushRWI]# #[ochunkmaxRWI]# #[ochunkcurRWI]# @@ -117,7 +116,6 @@ #[usedRWI]# #[dfltRWI]# -#[goodRWI]# #[bestRWI]# The Assortment Cluster stores most of the page indexes. Flushing speed of the temporary RWI cache depends on the size of this file cache. Increasing the space of this @@ -127,9 +125,9 @@ cache will speed up crawls with a depth > 3. HTTP Response Header #[slreqHTTP]# -#[chunkHTTP]# +#[chunkHTTP]# #[slempHTTP]# -#[slhigHTTP]#
#[slmedHTTP]#
#[sllowHTTP]# +#[slfilHTTP]# #[slhittmissHTTP]#
#[sluniqdoubHTTP]#
#[slflushHTTP]# #[ochunkmaxHTTP]# #[ochunkcurHTTP]# @@ -138,7 +136,6 @@ cache will speed up crawls with a depth > 3. #[usedHTTP]# #[dfltHTTP]# -#[goodHTTP]# #[bestHTTP]# The Response Header database stores the HTTP heades that other servers send when YaCy retrieves web pages during proxy mode, when performing crawls or if it fetches pages for snippet generation. @@ -148,9 +145,9 @@ Increasing this cache will be most important for a fast proxy mode. 'loaded' URLs #[slreqLURL]# -#[chunkLURL]# +#[chunkLURL]# #[slempLURL]# -#[slhigLURL]#
#[slmedLURL]#
#[sllowLURL]# +#[slfilLURL]# #[slhittmissLURL]#
#[sluniqdoubLURL]#
#[slflushLURL]# #[ochunkmaxLURL]# #[ochunkcurLURL]# @@ -159,7 +156,6 @@ Increasing this cache will be most important for a fast proxy mode. #[usedLURL]# #[dfltLURL]# -#[goodLURL]# #[bestLURL]# This is the database that holds the hash/url - relation and properties regarding the url like load date and server date. This cache is very important for a fast search process. Increasing the cache size will result in more search results and less IO during DHT transfer. @@ -168,9 +164,9 @@ This cache is very important for a fast search process. Increasing the cache siz 'noticed' URLs #[slreqNURL]# -#[chunkNURL]# +#[chunkNURL]# #[slempNURL]# -#[slhigNURL]#
#[slmedNURL]#
#[sllowNURL]# +#[slfilNURL]# #[slhittmissNURL]#
#[sluniqdoubNURL]#
#[slflushNURL]# #[ochunkmaxNURL]# #[ochunkcurNURL]# @@ -179,7 +175,6 @@ This cache is very important for a fast search process. Increasing the cache siz #[usedNURL]# #[dfltNURL]# -#[goodNURL]# #[bestNURL]# A noticed URL is one that was discovered during crawling but was not loaded yet. Increasing the cache size will result in faster double-check during URL recognition when doing crawls. @@ -188,9 +183,9 @@ Increasing the cache size will result in faster double-check during URL recognit 'error' URLs #[slreqEURL]# -#[chunkEURL]# +#[chunkEURL]# #[slempEURL]# -#[slhigEURL]#
#[slmedEURL]#
#[sllowEURL]# +#[slfilEURL]# #[slhittmissEURL]#
#[sluniqdoubEURL]#
#[slflushEURL]# #[ochunkmaxEURL]# #[ochunkcurEURL]# @@ -199,7 +194,6 @@ Increasing the cache size will result in faster double-check during URL recognit #[usedEURL]# #[dfltEURL]# -#[goodEURL]# #[bestEURL]# URLs that cannot be loaded are stored in this database. It is also used for double-checked during crawling. Increasing the cache size will most probably speed up crawling slightly, but not significantly. @@ -208,9 +202,9 @@ Increasing the cache size will most probably speed up crawling slightly, but not DHT Control #[slreqDHT]# -#[chunkDHT]# +#[chunkDHT]# #[slempDHT]# -#[slhigDHT]#
#[slmedDHT]#
#[sllowDHT]# +#[slfilDHT]# #[slhittmissDHT]#
#[sluniqdoubDHT]#
#[slflushDHT]# #[ochunkmaxDHT]# #[ochunkcurDHT]# @@ -219,7 +213,6 @@ Increasing the cache size will most probably speed up crawling slightly, but not #[usedDHT]# #[dfltDHT]# -#[goodDHT]# #[bestDHT]# This is simply the cache for the seed-dbs (active, passive, potential). This cache is divided into three equal parts. @@ -229,9 +222,9 @@ Increasing this cache may speed up many functions, but we need to test this to s Messages #[slreqMessage]# -#[chunkMessage]# +#[chunkMessage]# #[slempMessage]# -#[slhigMessage]#
#[slmedMessage]#
#[sllowMessage]# +#[slfilMessage]# #[slhittmissMessage]#
#[sluniqdoubMessage]#
#[slflushMessage]# #[ochunkmaxMessage]# #[ochunkcurMessage]# @@ -240,7 +233,6 @@ Increasing this cache may speed up many functions, but we need to test this to s #[usedMessage]# #[dfltMessage]# -#[goodMessage]# #[bestMessage]# The Message cache for peer-to-peer messages. Less important. @@ -248,9 +240,9 @@ Increasing this cache may speed up many functions, but we need to test this to s Wiki #[slreqWiki]# -#[chunkWiki]# +#[chunkWiki]# #[slempWiki]# -#[slhigWiki]#
#[slmedWiki]#
#[sllowWiki]# +#[slfilWiki]# #[slhittmissWiki]#
#[sluniqdoubWiki]#
#[slflushWiki]# #[ochunkmaxWiki]# #[ochunkcurWiki]# @@ -259,7 +251,6 @@ Increasing this cache may speed up many functions, but we need to test this to s #[usedWiki]# #[dfltWiki]# -#[goodWiki]# #[bestWiki]# The YaCy-Wiki uses a database to store its pages. This cache is divided in two parts, one for the wiki database and one for its backup. @@ -269,9 +260,9 @@ Increasing this cache may speed up access to the wiki pages. Blog #[slreqBlog]# -#[chunkBlog]# +#[chunkBlog]# #[slempBlog]# -#[slhigBlog]#
#[slmedBlog]#
#[sllowBlog]# +#[slfilBlog]# #[slhittmissBlog]#
#[sluniqdoubBlog]#
#[slflushBlog]# #[ochunkmaxBlog]# #[ochunkcurBlog]# @@ -280,7 +271,6 @@ Increasing this cache may speed up access to the wiki pages. #[usedBlog]# #[dfltBlog]# -#[goodBlog]# #[bestBlog]# The YaCy-Blog uses a database to store its entries. Increasing this cache may speed up access to the Blog. @@ -289,9 +279,9 @@ Increasing this cache may speed up access to the Blog. News #[slreqNews]# -#[chunkNews]# +#[chunkNews]# #[slempNews]# -#[slhigNews]#
#[slmedNews]#
#[sllowNews]# +#[slfilNews]# #[slhittmissNews]#
#[sluniqdoubNews]#
#[slflushNews]# #[ochunkmaxNews]# #[ochunkcurNews]# @@ -300,7 +290,6 @@ Increasing this cache may speed up access to the Blog. #[usedNews]# #[dfltNews]# -#[goodNews]# #[bestNews]# The News-DB stores property-lists for news that are included in seeds. Increasing this cache may speed up the peer-ping. @@ -309,9 +298,9 @@ Increasing this cache may speed up the peer-ping. robots.txt DB #[slreqRobots]# -#[chunkRobots]# +#[chunkRobots]# #[slempRobots]# -#[slhigRobots]#
#[slmedRobots]#
#[sllowRobots]# +#[slfilRobots]# #[slhittmissRobots]#
#[sluniqdoubRobots]#
#[slflushRobots]# #[ochunkmaxRobots]# #[ochunkcurRobots]# @@ -320,7 +309,6 @@ Increasing this cache may speed up the peer-ping. #[usedRobots]# #[dfltRobots]# -#[goodRobots]# #[bestRobots]# The robots.txt DB stores downloaded records from robots.txt files. Increasing this cache may speed up validation if crawling of the URL is allowed. @@ -329,9 +317,9 @@ Increasing this cache may speed up validation if crawling of the URL is allowed. Crawl Profiles #[slreqProfiles]# -#[chunkProfiles]# +#[chunkProfiles]# #[slempProfiles]# -#[slhigProfiles]#
#[slmedProfiles]#
#[sllowProfiles]# +#[slfilProfiles]# #[slhittmissProfiles]#
#[sluniqdoubProfiles]#
#[slflushProfiles]# #[ochunkmaxProfiles]# #[ochunkcurProfiles]# @@ -340,7 +328,6 @@ Increasing this cache may speed up validation if crawling of the URL is allowed. #[usedProfiles]# #[dfltProfiles]# -#[goodProfiles]# #[bestProfiles]# The profile database stores properties for each crawl that is started on the local peer. Increasing this cache may speed up crawling, but not much space is needed, so the effect may be low. diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 722eedafd..ed8997f10 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -62,11 +62,12 @@ public class PerformanceMemory_p { private static final long MB = 1024 * KB; private static Map defaultSettings = null; - private static int[] slt,chk; + private static int chk; + private static int[] slt; private static String[] ost; - private static long req, usd, bst, god; + private static long req, usd, bst; - private static long usedTotal, currTotal, dfltTotal, goodTotal, bestTotal; + private static long usedTotal, currTotal, dfltTotal, bestTotal; public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { // return variable that accumulates replacements @@ -160,7 +161,6 @@ public class PerformanceMemory_p { usedTotal = 0; currTotal = 0; dfltTotal = 0; - goodTotal = 0; bestTotal = 0; req = sb.wordIndex.size(); @@ -238,7 +238,6 @@ public class PerformanceMemory_p { prop.put("usedTotal", usedTotal / MB); prop.put("currTotal", currTotal / MB); prop.put("dfltTotal", dfltTotal / MB); - prop.put("goodTotal", goodTotal / MB); prop.put("bestTotal", bestTotal / MB); // parse initialization memory settings @@ -294,17 +293,13 @@ public class PerformanceMemory_p { } private static void putprop(serverObjects prop, serverSwitch env, String db, String set) { - usd = ((long) chk[0]) * ((long) slt[3]) + ((long) chk[1]) * ((long) slt[2]) + ((long) chk[2]) * ((long) slt[1]); - bst = (((((long) chk[2]) * ((long) req)) >> 10) + 1) << 10; - god = (((((long) bst) / ((long) (1+slt[1]+slt[2]+slt[3])) * ((long) slt[1])) >> 10) + 1) << 10; - if (set.equals("setGood")) env.setConfig("ramCache" + db, god); + usd = ((long) chk) * ((long) slt[1]); + bst = (((((long) chk) * ((long) req)) >> 10) + 1) << 10; if (set.equals("setBest")) env.setConfig("ramCache" + db, bst); - prop.put("chunk" + db, chk[2] + "/" + chk[1] + "/" + chk[0]); + prop.put("chunk" + db, chk); prop.put("slreq" + db, req); - prop.put("slemp" + db, slt[0] - slt[1] - slt[2] - slt[3]); - prop.put("slhig" + db, slt[1]); - prop.put("slmed" + db, slt[2]); - prop.put("sllow" + db, slt[3]); + prop.put("slemp" + db, slt[0] - slt[1]); + prop.put("slfil" + db, slt[1]); prop.put("slhittmiss" + db, slt[4] + ":" + slt[5]); prop.put("sluniqdoub" + db, slt[6] + ":" + slt[7]); prop.put("slflush" + db, slt[8] + ":" + slt[9]); @@ -317,14 +312,12 @@ public class PerformanceMemory_p { prop.put("nuniqdoub" + db, ost[14] + ":" + ost[15]); prop.put("nflush" + db, ost[16] + ":" + ost[17]); prop.put("used" + db, usd / KB); - prop.put("good" + db, god / KB); prop.put("best" + db, bst / KB); prop.put("dflt" + db, Long.parseLong((String) defaultSettings.get("ramCache" + db)) / KB); prop.put("ramCache" + db, Long.parseLong(env.getConfig("ramCache" + db, "0")) / KB); usedTotal += usd; currTotal += Long.parseLong(env.getConfig("ramCache" + db, "0")); dfltTotal += Long.parseLong((String) defaultSettings.get("ramCache" + db)); - goodTotal += god; bestTotal += bst; } } diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index 761376341..f91ec62bc 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -97,7 +97,7 @@ public class blogBoard { return datbase.size(); } - public int[] dbCacheNodeChunkSize() { + public int dbCacheNodeChunkSize() { return datbase.cacheNodeChunkSize(); } diff --git a/source/de/anomic/data/messageBoard.java b/source/de/anomic/data/messageBoard.java index 93fc724c2..26f389f25 100644 --- a/source/de/anomic/data/messageBoard.java +++ b/source/de/anomic/data/messageBoard.java @@ -86,7 +86,7 @@ public class messageBoard { return database.size(); } - public int[] dbCacheNodeChunkSize() { + public int dbCacheNodeChunkSize() { return database.cacheNodeChunkSize(); } diff --git a/source/de/anomic/data/userDB.java b/source/de/anomic/data/userDB.java index c37d99dd0..a373e6ba9 100644 --- a/source/de/anomic/data/userDB.java +++ b/source/de/anomic/data/userDB.java @@ -92,7 +92,7 @@ public final class userDB { } } - public int[] dbCacheNodeChunkSize() { + public int dbCacheNodeChunkSize() { return userTable.cacheNodeChunkSize(); } diff --git a/source/de/anomic/data/wikiBoard.java b/source/de/anomic/data/wikiBoard.java index 48a02d610..0cd4be517 100644 --- a/source/de/anomic/data/wikiBoard.java +++ b/source/de/anomic/data/wikiBoard.java @@ -100,14 +100,10 @@ public class wikiBoard { return datbase.size(); } - public int[] dbCacheNodeChunkSize() { - int[] db = datbase.cacheNodeChunkSize(); - int[] bk = bkpbase.cacheNodeChunkSize(); - int[] i = new int[3]; - i[kelondroRecords.CP_LOW] = (db[kelondroRecords.CP_LOW] + bk[kelondroRecords.CP_LOW]) / 2; - i[kelondroRecords.CP_MEDIUM] = (db[kelondroRecords.CP_MEDIUM] + bk[kelondroRecords.CP_MEDIUM]) / 2; - i[kelondroRecords.CP_HIGH] = (db[kelondroRecords.CP_HIGH] + bk[kelondroRecords.CP_HIGH]) / 2; - return i; + public int dbCacheNodeChunkSize() { + int db = datbase.cacheNodeChunkSize(); + int bk = bkpbase.cacheNodeChunkSize(); + return (db + bk) / 2; } public int[] dbCacheNodeStatus() { diff --git a/source/de/anomic/index/indexURL.java b/source/de/anomic/index/indexURL.java index 8292fb2d1..d325043fd 100644 --- a/source/de/anomic/index/indexURL.java +++ b/source/de/anomic/index/indexURL.java @@ -430,7 +430,7 @@ public class indexURL { if (urlHashCache != null) urlHashCache.close(); } - public int[] cacheNodeChunkSize() { + public int cacheNodeChunkSize() { return urlHashCache.cacheNodeChunkSize(); } diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index ef05f3e49..1bb582e01 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -24,7 +24,7 @@ package de.anomic.kelondro; -public class kelondroIntBytesMap extends kelondroRowSet { +public class kelondroIntBytesMap extends kelondroRowBufferedSet { public kelondroIntBytesMap(int payloadSize, int initSize) { super(new kelondroRow(new int[]{4, payloadSize}), initSize); diff --git a/source/de/anomic/kelondro/kelondroMap.java b/source/de/anomic/kelondro/kelondroMap.java index a4a84f972..e2b7555b4 100644 --- a/source/de/anomic/kelondro/kelondroMap.java +++ b/source/de/anomic/kelondro/kelondroMap.java @@ -133,7 +133,7 @@ public class kelondroMap { return dyn.row().width(0); } - public int[] cacheNodeChunkSize() { + public int cacheNodeChunkSize() { return dyn.cacheNodeChunkSize(); } diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index 4d9b3146d..1c1a4c7ad 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -71,7 +71,6 @@ package de.anomic.kelondro; import java.io.File; import java.io.IOException; import java.util.HashSet; -import java.util.NoSuchElementException; import java.util.Random; import java.util.StringTokenizer; import java.util.Iterator; @@ -86,7 +85,6 @@ public class kelondroRecords { // memory calculation private static final int element_in_cache = 4; // for kelondroCollectionObjectMap: 4; for HashMap: 52 - private static final int cache_control_entry = 96; // caching flags public static final int CP_NONE = -1; // cache priority none; entry shall not be cached @@ -141,10 +139,8 @@ public class kelondroRecords { private int TXTPROPW; // size of a single TXTPROPS element // caching buffer - private kelondroIntBytesMap[] cacheHeaders; // the cache; holds overhead values and key element + private kelondroIntBytesMap cacheHeaders; // the cache; holds overhead values and key element private int cacheSize; // number of cache records - private long cacheStartup; // startup time; for cache aging - private kelondroMScoreCluster cacheScore; // controls cache aging private int readHit, readMiss, writeUnique, writeDouble, cacheDelete, cacheFlush; // optional logger @@ -417,25 +413,11 @@ public class kelondroRecords { if (buffersize <= 0) { this.cacheSize = 0; this.cacheHeaders = null; - this.cacheScore = null; } else { - if ((buffersize / cacheNodeChunkSize(false)) > size()) { - this.cacheSize = (int) (buffersize / cacheNodeChunkSize(false)); - this.cacheScore = null; // no cache control because we have more cache slots than database entries - } else { - this.cacheSize = (int) (buffersize / cacheNodeChunkSize(true)); - this.cacheScore = new kelondroMScoreCluster(); // cache control of CP_HIGH caches - } - this.cacheHeaders = new kelondroIntBytesMap[]{ - new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 4), - new kelondroIntBytesMap(this.headchunksize, 0), - new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 4) - }; - this.cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0); - this.cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0); - this.cacheHeaders[2].setOrdering(kelondroNaturalOrder.naturalOrder, 0); + this.cacheSize = (int) (buffersize / cacheNodeChunkSize()); + this.cacheHeaders = new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 4); + this.cacheHeaders.setOrdering(kelondroNaturalOrder.naturalOrder, 0); } - this.cacheStartup = System.currentTimeMillis(); this.readHit = 0; this.readMiss = 0; this.writeUnique = 0; @@ -457,29 +439,17 @@ public class kelondroRecords { return new File(filename); } - protected final int cacheNodeChunkSize(boolean cacheControl) { - return this.headchunksize + element_in_cache + ((cacheControl) ? cache_control_entry : 0); - } - - public int[] cacheNodeChunkSize() { - // returns three integers: - // #0: chunk size of CP_LOW - priority entries - // #1: chunk size of CP_MEDIUM - priority entries - // #2: chunk size of CP_HIGH - priority entries - int[] i = new int[3]; - i[CP_LOW] = cacheNodeChunkSize(false); - i[CP_MEDIUM] = cacheNodeChunkSize(false); - i[CP_HIGH] = cacheNodeChunkSize(this.cacheScore != null); - return i; + public final int cacheNodeChunkSize() { + return this.headchunksize + element_in_cache; } public int[] cacheNodeStatus() { if (cacheHeaders == null) return new int[]{0,0,0,0,0,0,0,0,0,0}; return new int[]{ cacheSize, - cacheHeaders[CP_HIGH].size(), - cacheHeaders[CP_MEDIUM].size(), - cacheHeaders[CP_LOW].size(), + cacheHeaders.size(), + 0, // not used + 0, // not used readHit, readMiss, writeUnique, @@ -492,9 +462,7 @@ public class kelondroRecords { public String cacheNodeStatusString() { return "cacheMaxSize=" + cacheSize + - ", CP_HIGH=" + cacheHeaders[CP_HIGH].size() + - ", CP_MEDIUM=" + cacheHeaders[CP_MEDIUM].size() + - ", CP_LOW=" + cacheHeaders[CP_LOW].size() + + ", cacheCurrSize=" + cacheHeaders.size() + ", readHit=" + readHit + ", readMiss=" + readMiss + ", writeUnique=" + writeUnique + @@ -531,20 +499,7 @@ public class kelondroRecords { protected void deleteNode(Handle handle) throws IOException { if (cacheSize != 0) { synchronized (cacheHeaders) { - if (cacheScore == null) { - cacheHeaders[CP_LOW].removeb(handle.index); - cacheHeaders[CP_MEDIUM].removeb(handle.index); - cacheHeaders[CP_HIGH].removeb(handle.index); - } else if (cacheHeaders[CP_HIGH].getb(handle.index) != null) { - // remove handle from cache-control - cacheScore.deleteScore(handle); - cacheHeaders[CP_HIGH].removeb(handle.index); - } else { - // no cache control for medium-priority entries and - // no cache control for low-priority entries - cacheHeaders[CP_MEDIUM].removeb(handle.index); - cacheHeaders[CP_LOW].removeb(handle.index); - } + cacheHeaders.removeb(handle.index); cacheDelete++; } } @@ -657,15 +612,7 @@ public class kelondroRecords { } else synchronized(cacheHeaders) { byte[] cacheEntry = null; int cp = CP_HIGH; - cacheEntry = cacheHeaders[CP_HIGH].getb(this.handle.index); // first try - if (cacheEntry == null) { - cacheEntry = cacheHeaders[CP_MEDIUM].getb(this.handle.index); // second try - cp = CP_MEDIUM; - } - if (cacheEntry == null) { - cacheEntry = cacheHeaders[CP_LOW].getb(this.handle.index); // third try - cp = CP_LOW; - } + cacheEntry = cacheHeaders.getb(this.handle.index); if (cacheEntry == null) { // cache miss, we read overhead and key from file readMiss++; @@ -694,10 +641,6 @@ public class kelondroRecords { //this.headChunk = new byte[headchunksize]; //System.arraycopy(cacheEntry, 0, this.headChunk, 0, headchunksize); this.headChunk = cacheEntry; - // update cache scores to announce this cache hit - if ((cacheScore != null) && (cp == CP_HIGH)) { - cacheScore.setScore(this.handle, (int) ((System.currentTimeMillis() - cacheStartup) / 1000)); - } this.headChanged = false; } } @@ -855,90 +798,27 @@ public class kelondroRecords { } private void update2Cache(int forPriority) { - if (cacheSize > 0) { - cacheHeaders[CP_LOW].removeb(this.handle.index); - cacheHeaders[CP_MEDIUM].removeb(this.handle.index); - cacheHeaders[CP_HIGH].removeb(this.handle.index); - } - if (cacheSpace(forPriority)) updateNodeCache(forPriority); + if (cacheSpace()) updateNodeCache(forPriority); } - private boolean cacheSpace(int forPriority) { + private boolean cacheSpace() { // check for space in cache // should be only called within a synchronized(XcacheHeaders) environment // returns true if it is allowed to add another entry to the cache // returns false if the cache is considered to be full - if (forPriority == CP_NONE) return false; if (cacheSize == 0) return false; // no caching - long cs = cacheHeaders[CP_LOW].size() + cacheHeaders[CP_MEDIUM].size() + cacheHeaders[CP_HIGH].size(); - if (cs == 0) return true; // nothing there to flush - if ((cs < cacheSize) && (availableMemory() >= memBlock)) return true; // no need to flush cache space + if (cacheHeaders.size() == 0) return true; // nothing there to flush + if ((cacheHeaders.size() < cacheSize) && (availableMemory() >= memBlock)) return true; // no need to flush cache space // delete one entry. distinguish between different priority cases: - if (forPriority == CP_LOW) { - // remove only from low-priority cache - if (cacheHeaders[CP_LOW].size() != 0) { - // just delete any of the low-priority entries - cacheHeaders[CP_LOW].removeOne(); - cacheFlush++; - return true; - } else { - // we cannot delete any entry, therefore there is no space for another entry - return false; - } - } else if (forPriority == CP_MEDIUM) { - if (cacheHeaders[CP_LOW].size() != 0) { - // just delete any of the low-priority entries - cacheHeaders[CP_LOW].removeOne(); + if (cacheHeaders.size() != 0) { + // just delete any of the entries + cacheHeaders.removeOne(); cacheFlush++; return true; - } else if (cacheHeaders[CP_MEDIUM].size() != 0) { - // just delete any of the medium-priority entries - cacheHeaders[CP_MEDIUM].removeOne(); - cacheFlush++; - return true; - } else { - // we cannot delete any entry, therefore there is no space for another entry - return false; - } } else { - // request for a high-priority entry - if (cacheHeaders[CP_LOW].size() != 0) { - // just delete any of the low-priority entries - cacheHeaders[CP_LOW].removeOne(); - cacheFlush++; - return true; - } else if (cacheHeaders[CP_MEDIUM].size() != 0) { - // just delete any of the medium-priority entries - cacheHeaders[CP_MEDIUM].removeOne(); - cacheFlush++; - return true; - } else if (cacheScore == null) { - // no cache-control of high-priority cache - // the cache is considered as full + // we cannot delete any entry, therefore there is no space for another entry return false; - } else try { - // delete one from the high-priority entries - // use the cache-control to find the right object - Handle delkey = (Handle) cacheScore.getMinObject(); - cacheScore.deleteScore(delkey); - cacheHeaders[CP_HIGH].removeb(delkey.index); - cacheFlush++; - return true; - } catch (NoSuchElementException e) { - // this is a strange error and could be caused by internal java problems - // we simply clear the cache - String error = "cachScore error: " + e.getMessage() + "; cachesize=" + cacheSize + ", cache.size()=[" + cacheHeaders[0].size() + "," + cacheHeaders[1].size() + "," + cacheHeaders[2].size() + "], cacheScore.size()=" + cacheScore.size(); - cacheScore = new kelondroMScoreCluster(); - cacheHeaders[CP_LOW] = new kelondroIntBytesMap(headchunksize, cacheSize / 4); - cacheHeaders[CP_MEDIUM] = new kelondroIntBytesMap(headchunksize, 0); - cacheHeaders[CP_HIGH] = new kelondroIntBytesMap(headchunksize, cacheSize / 4); - cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0); - cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0); - cacheHeaders[2].setOrdering(kelondroNaturalOrder.naturalOrder, 0); - throw new kelondroException(filename, error); - - } } } @@ -947,8 +827,7 @@ public class kelondroRecords { if (this.headChunk == null) return; // nothing there to cache if (priority == CP_NONE) return; // it is not wanted that this shall be cached if (cacheSize == 0) return; // we do not use the cache - int cs = cacheHeaders[CP_LOW].size() + cacheHeaders[CP_MEDIUM].size() + cacheHeaders[CP_HIGH].size(); - if (cs >= cacheSize) return; // no cache update if cache is full + if (cacheHeaders.size() >= cacheSize) return; // no cache update if cache is full synchronized (cacheHeaders) { // generate cache entry @@ -958,13 +837,7 @@ public class kelondroRecords { // store the cache entry boolean upd = false; - if (priority != CP_LOW) upd = upd || (cacheHeaders[CP_LOW].removeb(cacheHandle.index) != null); - if (priority != CP_MEDIUM) upd = upd || (cacheHeaders[CP_MEDIUM].removeb(cacheHandle.index) != null); - if (priority != CP_HIGH) upd = upd || (cacheHeaders[CP_HIGH].removeb(cacheHandle.index) != null); - cacheHeaders[priority].putb(cacheHandle.index, headChunk); - if ((cacheScore != null) && (priority == CP_HIGH)) { - cacheScore.setScore(cacheHandle, (int) ((System.currentTimeMillis() - cacheStartup) / 1000)); - } + cacheHeaders.putb(cacheHandle.index, headChunk); if (upd) writeDouble++; else writeUnique++; // delete the cache entry buffer @@ -991,15 +864,15 @@ public class kelondroRecords { System.out.println(); } } else { - System.out.println("### cache report: [" + cacheHeaders[0].size() + "," + cacheHeaders[0].size() + "," + cacheHeaders[0].size() + "] entries"); - for (int cp = 0; cp < 3; cp++) { - Iterator i = cacheHeaders[cp].elements(); + System.out.println("### cache report: " + cacheHeaders.size() + " entries"); + + Iterator i = cacheHeaders.elements(); byte[] entry; while (i.hasNext()) { entry = (byte[]) i.next(); // print from cache - System.out.print("#C " + cp + " "); + System.out.print("#C "); printChunk((byte[]) entry); System.out.println(); @@ -1013,7 +886,6 @@ public class kelondroRecords { */ System.out.println(); } - } } System.out.println("### end report"); } @@ -1374,11 +1246,7 @@ public class kelondroRecords { public kelondroProfile[] profiles() { return new kelondroProfile[]{ (cacheHeaders == null) ? new kelondroProfile() : - kelondroProfile.consolidate(new kelondroProfile[]{ - cacheHeaders[0].profile(), - cacheHeaders[1].profile(), - cacheHeaders[2].profile() - }), + cacheHeaders.profile(), entryFile.profile() }; } diff --git a/source/de/anomic/kelondro/kelondroStack.java b/source/de/anomic/kelondro/kelondroStack.java index 174c88662..b3bed9232 100644 --- a/source/de/anomic/kelondro/kelondroStack.java +++ b/source/de/anomic/kelondro/kelondroStack.java @@ -99,7 +99,7 @@ public final class kelondroStack extends kelondroRecords { public static kelondroStack reset(kelondroStack stack) { // memorize settings to this file File f = new File(stack.filename); - long bz = stack.cacheNodeStatus()[0] * stack.cacheNodeChunkSize(true); + long bz = stack.cacheNodeStatus()[0] * stack.cacheNodeChunkSize(); kelondroRow row = stack.row(); // close and delete the file diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index db28cfcc2..e5e10b752 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -576,7 +576,7 @@ public final class plasmaCrawlLURL extends indexURL { kelondroBase64Order.enhancedCoder.encodeLong(wordCount, urlWordCountLength).getBytes(), }; urlHashCache.put(urlHashCache.row().newEntry(entry)); - serverLog.logFine("PLASMA","STORED new LURL " + url.toString()); + //serverLog.logFine("PLASMA","STORED new LURL " + url.toString()); this.stored = true; } catch (Exception e) { serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString(), e); diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java index c5f0050a1..a68c29b04 100644 --- a/source/de/anomic/plasma/plasmaCrawlProfile.java +++ b/source/de/anomic/plasma/plasmaCrawlProfile.java @@ -78,7 +78,7 @@ public class plasmaCrawlProfile { domsCache = new HashMap(); } - public int[] dbCacheNodeChunkSize() { + public int dbCacheNodeChunkSize() { return profileTable.cacheNodeChunkSize(); } diff --git a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java index 75e25a505..3e168fd0f 100644 --- a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java +++ b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java @@ -85,7 +85,7 @@ public class plasmaCrawlRobotsTxt { } } - public int[] dbCacheNodeChunkSize() { + public int dbCacheNodeChunkSize() { return robotsTable.cacheNodeChunkSize(); } diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index da77e5430..1d3c2e895 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -203,7 +203,7 @@ public final class plasmaHTCache { return this.responseHeaderDB.size(); } - public int[] dbCacheChunkSize() { + public int dbCacheChunkSize() { return this.responseHeaderDB.cacheNodeChunkSize(); } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 302b9f710..18f5c3742 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -808,7 +808,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // work off unwritten files if (entry.cacheArray == null) { - this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile); + //this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile); } else { String error = entry.shallStoreCacheForProxy(); if (error == null) { diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 0afb5ebd2..927fb7264 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -133,7 +133,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { return assortmentCluster.sizes(); } - public int[] assortmentsCacheChunkSizeAvg() { + public int assortmentsCacheChunkSizeAvg() { return assortmentCluster.cacheChunkSizeAvg(); } diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index a728016c4..b71823959 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -264,7 +264,7 @@ public final class plasmaWordIndexAssortment { return assortments.size(); } - public int[] cacheNodeChunkSize() { + public int cacheNodeChunkSize() { return assortments.cacheNodeChunkSize(); } diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 1fed05ef3..4f22c5785 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -290,19 +290,14 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl return sizes; } - public int[] cacheChunkSizeAvg() { - int[] i = new int[]{0, 0, 0}; - int[] a = new int[3]; + public int cacheChunkSizeAvg() { + int i = 0; + int a; for (int j = 0; j < clusterCount; j++) { a = assortments[j].cacheNodeChunkSize(); - i[kelondroRecords.CP_LOW] += a[kelondroRecords.CP_LOW]; - i[kelondroRecords.CP_MEDIUM] += a[kelondroRecords.CP_MEDIUM]; - i[kelondroRecords.CP_HIGH] += a[kelondroRecords.CP_HIGH]; + i += a; } - a[kelondroRecords.CP_LOW] = i[kelondroRecords.CP_LOW] / clusterCount; - a[kelondroRecords.CP_MEDIUM] = i[kelondroRecords.CP_MEDIUM] / clusterCount; - a[kelondroRecords.CP_HIGH] = i[kelondroRecords.CP_HIGH] / clusterCount; - return a; + return i / clusterCount; } public int[] cacheNodeStatus() { diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index 04cc0a8af..8d5333d67 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -100,7 +100,7 @@ public class yacyNewsDB { news = createDB(path, bufferkb); } - public int[] dbCacheNodeChunkSize() { + public int dbCacheNodeChunkSize() { return news.cacheNodeChunkSize(); } diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 795e6e8a9..4ac5b6ef8 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -106,7 +106,7 @@ public class yacyNewsPool { return newsDB.size(); } - public int[] dbCacheNodeChunkSize() { + public int dbCacheNodeChunkSize() { return newsDB.dbCacheNodeChunkSize(); } diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index e2316bdbf..ffe248a0d 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -170,15 +170,12 @@ public final class yacySeedDB { } catch (IOException e) {} } - public int[] dbCacheNodeChunkSize() { - int[] ac = seedActiveDB.cacheNodeChunkSize(); - int[] pa = seedPassiveDB.cacheNodeChunkSize(); - int[] po = seedPotentialDB.cacheNodeChunkSize(); - int[] i = new int[3]; - i[kelondroRecords.CP_LOW] = (ac[kelondroRecords.CP_LOW] + pa[kelondroRecords.CP_LOW] + po[kelondroRecords.CP_LOW]) / 3; - i[kelondroRecords.CP_MEDIUM] = (ac[kelondroRecords.CP_MEDIUM] + pa[kelondroRecords.CP_MEDIUM] + po[kelondroRecords.CP_MEDIUM]) / 3; - i[kelondroRecords.CP_HIGH] = (ac[kelondroRecords.CP_HIGH] + pa[kelondroRecords.CP_HIGH] + po[kelondroRecords.CP_HIGH]) / 3; - return i; + public int dbCacheNodeChunkSize() { + int ac = seedActiveDB.cacheNodeChunkSize(); + int pa = seedPassiveDB.cacheNodeChunkSize(); + int po = seedPotentialDB.cacheNodeChunkSize(); + + return (ac+ pa + po) / 3; } public int[] dbCacheNodeStatus() {