removed high/med/low from kelondroRecords cache control.

this was done because testing showed that cache-delete operations
slowed down record access most, even more that actual IO operations.
Cache-delete operations appeared when entries were shifted from low-priority
positions to high-priority positions. During a fill of x entries to a database,
x/2 delete situation happen which caused two or more delete operations.
removing the cache control means that these delete operations are not
necessary any more, but it is more difficult to decide which cache elements
shall be removed in case that the cache is full. There is not yet a stable
solution for this case, but the advantage of a faster cache is more important
that the flush problem.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2244 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 6866bc2758
commit 66964dc015

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.454
releaseVersion=0.455
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -83,14 +83,14 @@
<td class="small" rowspan="2">DB&nbsp;Size<br>&nbsp;</td>
<td class="small" colspan="4">Node Cache</td>
<td class="small" colspan="4">Object Cache</td>
<td class="small" colspan="5">Memory Occupation (KBytes)</td>
<td class="small" colspan="4">Memory Occupation (KBytes)</td>
<td class="small" rowspan="2">Description<br>&nbsp;</td>
</tr>
<tr class="TableHeader" valign="bottom">
<td class="small">Chunk Size<br>high/med/low<br>(bytes)</td>
<td class="small">Chunk<br>Size<br>(bytes)</td>
<td class="small">Empty<br>(avail.)<br>Slots</td>
<td class="small">Used: High, Medium, Low Prio</td>
<td class="small">Used<br>(filled)<br>Slots</td>
<td class="small">Node-Cache<br>Hit:Miss<br>Uniq:Doub<br>Del:Flush<br></td>
<td class="small">&nbsp;<br>Max<br>Size</td>
<td class="small">&nbsp;<br>Hit-Size<br>Miss-Size</td>
@ -99,16 +99,15 @@
<td class="small">Used Now</td>
<td class="small">Assigned Max</td>
<td class="small">Default Max</td>
<td class="small">Good Max</td>
<td class="small">Best Max</td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">RWI Assortment Cluster</td>
<td class="small" align="right">#[slreqRWI]#</td>
<td class="small" align="center">#[chunkRWI]#</td>
<td class="small" align="right">#[chunkRWI]#</td>
<td class="small" align="right">#[slempRWI]#</td>
<td class="small" align="right">#[slhigRWI]#<br>#[slmedRWI]#<br>#[sllowRWI]#</td>
<td class="small" align="right">#[slfilRWI]#</td>
<td class="small" align="right">#[slhittmissRWI]#<br>#[sluniqdoubRWI]#<br>#[slflushRWI]#</td>
<td class="small" align="right">#[ochunkmaxRWI]#</td>
<td class="small" align="right">#[ochunkcurRWI]#</td>
@ -117,7 +116,6 @@
<td class="small" align="right">#[usedRWI]#</td>
<td class="small" align="right"><input name="ramCacheRWI" type="text" align="right" size="6" maxlength="7" value="#[ramCacheRWI]#"></td>
<td class="small" align="right">#[dfltRWI]#</td>
<td class="small" align="right">#[goodRWI]#</td>
<td class="small" align="right">#[bestRWI]#</td>
<td class="small" align="left">The Assortment Cluster stores most of the page indexes.
Flushing speed of the temporary RWI cache depends on the size of this file cache. Increasing the space of this
@ -127,9 +125,9 @@ cache will speed up crawls with a depth > 3.</td>
<tr class="TableCellDark">
<td class="small" align="left">HTTP Response Header</td>
<td class="small" align="right">#[slreqHTTP]#</td>
<td class="small" align="center">#[chunkHTTP]#</td>
<td class="small" align="right">#[chunkHTTP]#</td>
<td class="small" align="right">#[slempHTTP]#</td>
<td class="small" align="right">#[slhigHTTP]#<br>#[slmedHTTP]#<br>#[sllowHTTP]#</td>
<td class="small" align="right">#[slfilHTTP]#</td>
<td class="small" align="right">#[slhittmissHTTP]#<br>#[sluniqdoubHTTP]#<br>#[slflushHTTP]#</td>
<td class="small" align="right">#[ochunkmaxHTTP]#</td>
<td class="small" align="right">#[ochunkcurHTTP]#</td>
@ -138,7 +136,6 @@ cache will speed up crawls with a depth > 3.</td>
<td class="small" align="right">#[usedHTTP]#</td>
<td class="small" align="right"><input name="ramCacheHTTP" type="text" align="right" size="6" maxlength="7" value="#[ramCacheHTTP]#"></td>
<td class="small" align="right">#[dfltHTTP]#</td>
<td class="small" align="right">#[goodHTTP]#</td>
<td class="small" align="right">#[bestHTTP]#</td>
<td class="small" align="left">The Response Header database stores the HTTP heades that other servers send when YaCy retrieves web pages
during proxy mode, when performing crawls or if it fetches pages for snippet generation.
@ -148,9 +145,9 @@ Increasing this cache will be most important for a fast proxy mode.</td>
<tr class="TableCellDark">
<td class="small" align="left">'loaded' URLs</td>
<td class="small" align="right">#[slreqLURL]#</td>
<td class="small" align="center">#[chunkLURL]#</td>
<td class="small" align="right">#[chunkLURL]#</td>
<td class="small" align="right">#[slempLURL]#</td>
<td class="small" align="right">#[slhigLURL]#<br>#[slmedLURL]#<br>#[sllowLURL]#</td>
<td class="small" align="right">#[slfilLURL]#</td>
<td class="small" align="right">#[slhittmissLURL]#<br>#[sluniqdoubLURL]#<br>#[slflushLURL]#</td>
<td class="small" align="right">#[ochunkmaxLURL]#</td>
<td class="small" align="right">#[ochunkcurLURL]#</td>
@ -159,7 +156,6 @@ Increasing this cache will be most important for a fast proxy mode.</td>
<td class="small" align="right">#[usedLURL]#</td>
<td class="small" align="right"><input name="ramCacheLURL" type="text" align="right" size="6" maxlength="7" value="#[ramCacheLURL]#"></td>
<td class="small" align="right">#[dfltLURL]#</td>
<td class="small" align="right">#[goodLURL]#</td>
<td class="small" align="right">#[bestLURL]#</td>
<td class="small" align="left">This is the database that holds the hash/url - relation and properties regarding the url like load date and server date.
This cache is very important for a fast search process. Increasing the cache size will result in more search results and less IO during DHT transfer.</td>
@ -168,9 +164,9 @@ This cache is very important for a fast search process. Increasing the cache siz
<tr class="TableCellDark">
<td class="small" align="left">'noticed' URLs</td>
<td class="small" align="right">#[slreqNURL]#</td>
<td class="small" align="center">#[chunkNURL]#</td>
<td class="small" align="right">#[chunkNURL]#</td>
<td class="small" align="right">#[slempNURL]#</td>
<td class="small" align="right">#[slhigNURL]#<br>#[slmedNURL]#<br>#[sllowNURL]#</td>
<td class="small" align="right">#[slfilNURL]#</td>
<td class="small" align="right">#[slhittmissNURL]#<br>#[sluniqdoubNURL]#<br>#[slflushNURL]#</td>
<td class="small" align="right">#[ochunkmaxNURL]#</td>
<td class="small" align="right">#[ochunkcurNURL]#</td>
@ -179,7 +175,6 @@ This cache is very important for a fast search process. Increasing the cache siz
<td class="small" align="right">#[usedNURL]#</td>
<td class="small" align="right"><input name="ramCacheNURL" type="text" align="right" size="6" maxlength="7" value="#[ramCacheNURL]#"></td>
<td class="small" align="right">#[dfltNURL]#</td>
<td class="small" align="right">#[goodNURL]#</td>
<td class="small" align="right">#[bestNURL]#</td>
<td class="small" align="left">A noticed URL is one that was discovered during crawling but was not loaded yet.
Increasing the cache size will result in faster double-check during URL recognition when doing crawls.</td>
@ -188,9 +183,9 @@ Increasing the cache size will result in faster double-check during URL recognit
<tr class="TableCellDark">
<td class="small" align="left">'error' URLs</td>
<td class="small" align="right">#[slreqEURL]#</td>
<td class="small" align="center">#[chunkEURL]#</td>
<td class="small" align="right">#[chunkEURL]#</td>
<td class="small" align="right">#[slempEURL]#</td>
<td class="small" align="right">#[slhigEURL]#<br>#[slmedEURL]#<br>#[sllowEURL]#</td>
<td class="small" align="right">#[slfilEURL]#</td>
<td class="small" align="right">#[slhittmissEURL]#<br>#[sluniqdoubEURL]#<br>#[slflushEURL]#</td>
<td class="small" align="right">#[ochunkmaxEURL]#</td>
<td class="small" align="right">#[ochunkcurEURL]#</td>
@ -199,7 +194,6 @@ Increasing the cache size will result in faster double-check during URL recognit
<td class="small" align="right">#[usedEURL]#</td>
<td class="small" align="right"><input name="ramCacheEURL" type="text" align="right" size="6" maxlength="7" value="#[ramCacheEURL]#"></td>
<td class="small" align="right">#[dfltEURL]#</td>
<td class="small" align="right">#[goodEURL]#</td>
<td class="small" align="right">#[bestEURL]#</td>
<td class="small" align="left">URLs that cannot be loaded are stored in this database. It is also used for double-checked during crawling.
Increasing the cache size will most probably speed up crawling slightly, but not significantly.</td>
@ -208,9 +202,9 @@ Increasing the cache size will most probably speed up crawling slightly, but not
<tr class="TableCellDark">
<td class="small" align="left">DHT Control</td>
<td class="small" align="right">#[slreqDHT]#</td>
<td class="small" align="center">#[chunkDHT]#</td>
<td class="small" align="right">#[chunkDHT]#</td>
<td class="small" align="right">#[slempDHT]#</td>
<td class="small" align="right">#[slhigDHT]#<br>#[slmedDHT]#<br>#[sllowDHT]#</td>
<td class="small" align="right">#[slfilDHT]#</td>
<td class="small" align="right">#[slhittmissDHT]#<br>#[sluniqdoubDHT]#<br>#[slflushDHT]#</td>
<td class="small" align="right">#[ochunkmaxDHT]#</td>
<td class="small" align="right">#[ochunkcurDHT]#</td>
@ -219,7 +213,6 @@ Increasing the cache size will most probably speed up crawling slightly, but not
<td class="small" align="right">#[usedDHT]#</td>
<td class="small" align="right"><input name="ramCacheDHT" type="text" align="right" size="6" maxlength="7" value="#[ramCacheDHT]#"></td>
<td class="small" align="right">#[dfltDHT]#</td>
<td class="small" align="right">#[goodDHT]#</td>
<td class="small" align="right">#[bestDHT]#</td>
<td class="small" align="left">This is simply the cache for the seed-dbs (active, passive, potential).
This cache is divided into three equal parts.
@ -229,9 +222,9 @@ Increasing this cache may speed up many functions, but we need to test this to s
<tr class="TableCellDark">
<td class="small" align="left">Messages</td>
<td class="small" align="right">#[slreqMessage]#</td>
<td class="small" align="center">#[chunkMessage]#</td>
<td class="small" align="right">#[chunkMessage]#</td>
<td class="small" align="right">#[slempMessage]#</td>
<td class="small" align="right">#[slhigMessage]#<br>#[slmedMessage]#<br>#[sllowMessage]#</td>
<td class="small" align="right">#[slfilMessage]#</td>
<td class="small" align="right">#[slhittmissMessage]#<br>#[sluniqdoubMessage]#<br>#[slflushMessage]#</td>
<td class="small" align="right">#[ochunkmaxMessage]#</td>
<td class="small" align="right">#[ochunkcurMessage]#</td>
@ -240,7 +233,6 @@ Increasing this cache may speed up many functions, but we need to test this to s
<td class="small" align="right">#[usedMessage]#</td>
<td class="small" align="right"><input name="ramCacheMessage" type="text" align="right" size="6" maxlength="7" value="#[ramCacheMessage]#"></td>
<td class="small" align="right">#[dfltMessage]#</td>
<td class="small" align="right">#[goodMessage]#</td>
<td class="small" align="right">#[bestMessage]#</td>
<td class="small" align="left">The Message cache for peer-to-peer messages. Less important.</td>
</tr>
@ -248,9 +240,9 @@ Increasing this cache may speed up many functions, but we need to test this to s
<tr class="TableCellDark">
<td class="small" align="left">Wiki</td>
<td class="small" align="right">#[slreqWiki]#</td>
<td class="small" align="center">#[chunkWiki]#</td>
<td class="small" align="right">#[chunkWiki]#</td>
<td class="small" align="right">#[slempWiki]#</td>
<td class="small" align="right">#[slhigWiki]#<br>#[slmedWiki]#<br>#[sllowWiki]#</td>
<td class="small" align="right">#[slfilWiki]#</td>
<td class="small" align="right">#[slhittmissWiki]#<br>#[sluniqdoubWiki]#<br>#[slflushWiki]#</td>
<td class="small" align="right">#[ochunkmaxWiki]#</td>
<td class="small" align="right">#[ochunkcurWiki]#</td>
@ -259,7 +251,6 @@ Increasing this cache may speed up many functions, but we need to test this to s
<td class="small" align="right">#[usedWiki]#</td>
<td class="small" align="right"><input name="ramCacheWiki" type="text" align="right" size="6" maxlength="7" value="#[ramCacheWiki]#"></td>
<td class="small" align="right">#[dfltWiki]#</td>
<td class="small" align="right">#[goodWiki]#</td>
<td class="small" align="right">#[bestWiki]#</td>
<td class="small" align="left">The YaCy-Wiki uses a database to store its pages.
This cache is divided in two parts, one for the wiki database and one for its backup.
@ -269,9 +260,9 @@ Increasing this cache may speed up access to the wiki pages.</td>
<tr class="TableCellDark">
<td class="small" align="left">Blog</td>
<td class="small" align="right">#[slreqBlog]#</td>
<td class="small" align="center">#[chunkBlog]#</td>
<td class="small" align="right">#[chunkBlog]#</td>
<td class="small" align="right">#[slempBlog]#</td>
<td class="small" align="right">#[slhigBlog]#<br>#[slmedBlog]#<br>#[sllowBlog]#</td>
<td class="small" align="right">#[slfilBlog]#</td>
<td class="small" align="right">#[slhittmissBlog]#<br>#[sluniqdoubBlog]#<br>#[slflushBlog]#</td>
<td class="small" align="right">#[ochunkmaxBlog]#</td>
<td class="small" align="right">#[ochunkcurBlog]#</td>
@ -280,7 +271,6 @@ Increasing this cache may speed up access to the wiki pages.</td>
<td class="small" align="right">#[usedBlog]#</td>
<td class="small" align="right"><input name="ramCacheBlog" type="text" align="right" size="6" maxlength="7" value="#[ramCacheBlog]#"></td>
<td class="small" align="right">#[dfltBlog]#</td>
<td class="small" align="right">#[goodBlog]#</td>
<td class="small" align="right">#[bestBlog]#</td>
<td class="small" align="left">The YaCy-Blog uses a database to store its entries.
Increasing this cache may speed up access to the Blog.</td>
@ -289,9 +279,9 @@ Increasing this cache may speed up access to the Blog.</td>
<tr class="TableCellDark">
<td class="small" align="left">News</td>
<td class="small" align="right">#[slreqNews]#</td>
<td class="small" align="center">#[chunkNews]#</td>
<td class="small" align="right">#[chunkNews]#</td>
<td class="small" align="right">#[slempNews]#</td>
<td class="small" align="right">#[slhigNews]#<br>#[slmedNews]#<br>#[sllowNews]#</td>
<td class="small" align="right">#[slfilNews]#</td>
<td class="small" align="right">#[slhittmissNews]#<br>#[sluniqdoubNews]#<br>#[slflushNews]#</td>
<td class="small" align="right">#[ochunkmaxNews]#</td>
<td class="small" align="right">#[ochunkcurNews]#</td>
@ -300,7 +290,6 @@ Increasing this cache may speed up access to the Blog.</td>
<td class="small" align="right">#[usedNews]#</td>
<td class="small" align="right"><input name="ramCacheNews" type="text" align="right" size="6" maxlength="7" value="#[ramCacheNews]#"></td>
<td class="small" align="right">#[dfltNews]#</td>
<td class="small" align="right">#[goodNews]#</td>
<td class="small" align="right">#[bestNews]#</td>
<td class="small" align="left">The News-DB stores property-lists for news that are included in seeds.
Increasing this cache may speed up the peer-ping.</td>
@ -309,9 +298,9 @@ Increasing this cache may speed up the peer-ping.</td>
<tr class="TableCellDark">
<td class="small" align="left">robots.txt DB</td>
<td class="small" align="right">#[slreqRobots]#</td>
<td class="small" align="center">#[chunkRobots]#</td>
<td class="small" align="right">#[chunkRobots]#</td>
<td class="small" align="right">#[slempRobots]#</td>
<td class="small" align="right">#[slhigRobots]#<br>#[slmedRobots]#<br>#[sllowRobots]#</td>
<td class="small" align="right">#[slfilRobots]#</td>
<td class="small" align="right">#[slhittmissRobots]#<br>#[sluniqdoubRobots]#<br>#[slflushRobots]#</td>
<td class="small" align="right">#[ochunkmaxRobots]#</td>
<td class="small" align="right">#[ochunkcurRobots]#</td>
@ -320,7 +309,6 @@ Increasing this cache may speed up the peer-ping.</td>
<td class="small" align="right">#[usedRobots]#</td>
<td class="small" align="right"><input name="ramCacheRobots" type="text" align="right" size="6" maxlength="7" value="#[ramCacheRobots]#"></td>
<td class="small" align="right">#[dfltRobots]#</td>
<td class="small" align="right">#[goodRobots]#</td>
<td class="small" align="right">#[bestRobots]#</td>
<td class="small" align="left">The robots.txt DB stores downloaded records from robots.txt files.
Increasing this cache may speed up validation if crawling of the URL is allowed.</td>
@ -329,9 +317,9 @@ Increasing this cache may speed up validation if crawling of the URL is allowed.
<tr class="TableCellDark">
<td class="small" align="left">Crawl Profiles</td>
<td class="small" align="right">#[slreqProfiles]#</td>
<td class="small" align="center">#[chunkProfiles]#</td>
<td class="small" align="right">#[chunkProfiles]#</td>
<td class="small" align="right">#[slempProfiles]#</td>
<td class="small" align="right">#[slhigProfiles]#<br>#[slmedProfiles]#<br>#[sllowProfiles]#</td>
<td class="small" align="right">#[slfilProfiles]#</td>
<td class="small" align="right">#[slhittmissProfiles]#<br>#[sluniqdoubProfiles]#<br>#[slflushProfiles]#</td>
<td class="small" align="right">#[ochunkmaxProfiles]#</td>
<td class="small" align="right">#[ochunkcurProfiles]#</td>
@ -340,7 +328,6 @@ Increasing this cache may speed up validation if crawling of the URL is allowed.
<td class="small" align="right">#[usedProfiles]#</td>
<td class="small" align="right"><input name="ramCacheProfiles" type="text" align="right" size="6" maxlength="7" value="#[ramCacheProfiles]#"></td>
<td class="small" align="right">#[dfltProfiles]#</td>
<td class="small" align="right">#[goodProfiles]#</td>
<td class="small" align="right">#[bestProfiles]#</td>
<td class="small" align="left">The profile database stores properties for each crawl that is started on the local peer.
Increasing this cache may speed up crawling, but not much space is needed, so the effect may be low.</td>

@ -62,11 +62,12 @@ public class PerformanceMemory_p {
private static final long MB = 1024 * KB;
private static Map defaultSettings = null;
private static int[] slt,chk;
private static int chk;
private static int[] slt;
private static String[] ost;
private static long req, usd, bst, god;
private static long req, usd, bst;
private static long usedTotal, currTotal, dfltTotal, goodTotal, bestTotal;
private static long usedTotal, currTotal, dfltTotal, bestTotal;
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
@ -160,7 +161,6 @@ public class PerformanceMemory_p {
usedTotal = 0;
currTotal = 0;
dfltTotal = 0;
goodTotal = 0;
bestTotal = 0;
req = sb.wordIndex.size();
@ -238,7 +238,6 @@ public class PerformanceMemory_p {
prop.put("usedTotal", usedTotal / MB);
prop.put("currTotal", currTotal / MB);
prop.put("dfltTotal", dfltTotal / MB);
prop.put("goodTotal", goodTotal / MB);
prop.put("bestTotal", bestTotal / MB);
// parse initialization memory settings
@ -294,17 +293,13 @@ public class PerformanceMemory_p {
}
private static void putprop(serverObjects prop, serverSwitch env, String db, String set) {
usd = ((long) chk[0]) * ((long) slt[3]) + ((long) chk[1]) * ((long) slt[2]) + ((long) chk[2]) * ((long) slt[1]);
bst = (((((long) chk[2]) * ((long) req)) >> 10) + 1) << 10;
god = (((((long) bst) / ((long) (1+slt[1]+slt[2]+slt[3])) * ((long) slt[1])) >> 10) + 1) << 10;
if (set.equals("setGood")) env.setConfig("ramCache" + db, god);
usd = ((long) chk) * ((long) slt[1]);
bst = (((((long) chk) * ((long) req)) >> 10) + 1) << 10;
if (set.equals("setBest")) env.setConfig("ramCache" + db, bst);
prop.put("chunk" + db, chk[2] + "/" + chk[1] + "/" + chk[0]);
prop.put("chunk" + db, chk);
prop.put("slreq" + db, req);
prop.put("slemp" + db, slt[0] - slt[1] - slt[2] - slt[3]);
prop.put("slhig" + db, slt[1]);
prop.put("slmed" + db, slt[2]);
prop.put("sllow" + db, slt[3]);
prop.put("slemp" + db, slt[0] - slt[1]);
prop.put("slfil" + db, slt[1]);
prop.put("slhittmiss" + db, slt[4] + ":" + slt[5]);
prop.put("sluniqdoub" + db, slt[6] + ":" + slt[7]);
prop.put("slflush" + db, slt[8] + ":" + slt[9]);
@ -317,14 +312,12 @@ public class PerformanceMemory_p {
prop.put("nuniqdoub" + db, ost[14] + ":" + ost[15]);
prop.put("nflush" + db, ost[16] + ":" + ost[17]);
prop.put("used" + db, usd / KB);
prop.put("good" + db, god / KB);
prop.put("best" + db, bst / KB);
prop.put("dflt" + db, Long.parseLong((String) defaultSettings.get("ramCache" + db)) / KB);
prop.put("ramCache" + db, Long.parseLong(env.getConfig("ramCache" + db, "0")) / KB);
usedTotal += usd;
currTotal += Long.parseLong(env.getConfig("ramCache" + db, "0"));
dfltTotal += Long.parseLong((String) defaultSettings.get("ramCache" + db));
goodTotal += god;
bestTotal += bst;
}
}

@ -97,7 +97,7 @@ public class blogBoard {
return datbase.size();
}
public int[] dbCacheNodeChunkSize() {
public int dbCacheNodeChunkSize() {
return datbase.cacheNodeChunkSize();
}

@ -86,7 +86,7 @@ public class messageBoard {
return database.size();
}
public int[] dbCacheNodeChunkSize() {
public int dbCacheNodeChunkSize() {
return database.cacheNodeChunkSize();
}

@ -92,7 +92,7 @@ public final class userDB {
}
}
public int[] dbCacheNodeChunkSize() {
public int dbCacheNodeChunkSize() {
return userTable.cacheNodeChunkSize();
}

@ -100,14 +100,10 @@ public class wikiBoard {
return datbase.size();
}
public int[] dbCacheNodeChunkSize() {
int[] db = datbase.cacheNodeChunkSize();
int[] bk = bkpbase.cacheNodeChunkSize();
int[] i = new int[3];
i[kelondroRecords.CP_LOW] = (db[kelondroRecords.CP_LOW] + bk[kelondroRecords.CP_LOW]) / 2;
i[kelondroRecords.CP_MEDIUM] = (db[kelondroRecords.CP_MEDIUM] + bk[kelondroRecords.CP_MEDIUM]) / 2;
i[kelondroRecords.CP_HIGH] = (db[kelondroRecords.CP_HIGH] + bk[kelondroRecords.CP_HIGH]) / 2;
return i;
public int dbCacheNodeChunkSize() {
int db = datbase.cacheNodeChunkSize();
int bk = bkpbase.cacheNodeChunkSize();
return (db + bk) / 2;
}
public int[] dbCacheNodeStatus() {

@ -430,7 +430,7 @@ public class indexURL {
if (urlHashCache != null) urlHashCache.close();
}
public int[] cacheNodeChunkSize() {
public int cacheNodeChunkSize() {
return urlHashCache.cacheNodeChunkSize();
}

@ -24,7 +24,7 @@
package de.anomic.kelondro;
public class kelondroIntBytesMap extends kelondroRowSet {
public class kelondroIntBytesMap extends kelondroRowBufferedSet {
public kelondroIntBytesMap(int payloadSize, int initSize) {
super(new kelondroRow(new int[]{4, payloadSize}), initSize);

@ -133,7 +133,7 @@ public class kelondroMap {
return dyn.row().width(0);
}
public int[] cacheNodeChunkSize() {
public int cacheNodeChunkSize() {
return dyn.cacheNodeChunkSize();
}

@ -71,7 +71,6 @@ package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.NoSuchElementException;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.Iterator;
@ -86,7 +85,6 @@ public class kelondroRecords {
// memory calculation
private static final int element_in_cache = 4; // for kelondroCollectionObjectMap: 4; for HashMap: 52
private static final int cache_control_entry = 96;
// caching flags
public static final int CP_NONE = -1; // cache priority none; entry shall not be cached
@ -141,10 +139,8 @@ public class kelondroRecords {
private int TXTPROPW; // size of a single TXTPROPS element
// caching buffer
private kelondroIntBytesMap[] cacheHeaders; // the cache; holds overhead values and key element
private kelondroIntBytesMap cacheHeaders; // the cache; holds overhead values and key element
private int cacheSize; // number of cache records
private long cacheStartup; // startup time; for cache aging
private kelondroMScoreCluster cacheScore; // controls cache aging
private int readHit, readMiss, writeUnique, writeDouble, cacheDelete, cacheFlush;
// optional logger
@ -417,25 +413,11 @@ public class kelondroRecords {
if (buffersize <= 0) {
this.cacheSize = 0;
this.cacheHeaders = null;
this.cacheScore = null;
} else {
if ((buffersize / cacheNodeChunkSize(false)) > size()) {
this.cacheSize = (int) (buffersize / cacheNodeChunkSize(false));
this.cacheScore = null; // no cache control because we have more cache slots than database entries
} else {
this.cacheSize = (int) (buffersize / cacheNodeChunkSize(true));
this.cacheScore = new kelondroMScoreCluster(); // cache control of CP_HIGH caches
}
this.cacheHeaders = new kelondroIntBytesMap[]{
new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 4),
new kelondroIntBytesMap(this.headchunksize, 0),
new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 4)
};
this.cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
this.cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
this.cacheHeaders[2].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
this.cacheSize = (int) (buffersize / cacheNodeChunkSize());
this.cacheHeaders = new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 4);
this.cacheHeaders.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
}
this.cacheStartup = System.currentTimeMillis();
this.readHit = 0;
this.readMiss = 0;
this.writeUnique = 0;
@ -457,29 +439,17 @@ public class kelondroRecords {
return new File(filename);
}
protected final int cacheNodeChunkSize(boolean cacheControl) {
return this.headchunksize + element_in_cache + ((cacheControl) ? cache_control_entry : 0);
}
public int[] cacheNodeChunkSize() {
// returns three integers:
// #0: chunk size of CP_LOW - priority entries
// #1: chunk size of CP_MEDIUM - priority entries
// #2: chunk size of CP_HIGH - priority entries
int[] i = new int[3];
i[CP_LOW] = cacheNodeChunkSize(false);
i[CP_MEDIUM] = cacheNodeChunkSize(false);
i[CP_HIGH] = cacheNodeChunkSize(this.cacheScore != null);
return i;
public final int cacheNodeChunkSize() {
return this.headchunksize + element_in_cache;
}
public int[] cacheNodeStatus() {
if (cacheHeaders == null) return new int[]{0,0,0,0,0,0,0,0,0,0};
return new int[]{
cacheSize,
cacheHeaders[CP_HIGH].size(),
cacheHeaders[CP_MEDIUM].size(),
cacheHeaders[CP_LOW].size(),
cacheHeaders.size(),
0, // not used
0, // not used
readHit,
readMiss,
writeUnique,
@ -492,9 +462,7 @@ public class kelondroRecords {
public String cacheNodeStatusString() {
return
"cacheMaxSize=" + cacheSize +
", CP_HIGH=" + cacheHeaders[CP_HIGH].size() +
", CP_MEDIUM=" + cacheHeaders[CP_MEDIUM].size() +
", CP_LOW=" + cacheHeaders[CP_LOW].size() +
", cacheCurrSize=" + cacheHeaders.size() +
", readHit=" + readHit +
", readMiss=" + readMiss +
", writeUnique=" + writeUnique +
@ -531,20 +499,7 @@ public class kelondroRecords {
protected void deleteNode(Handle handle) throws IOException {
if (cacheSize != 0) {
synchronized (cacheHeaders) {
if (cacheScore == null) {
cacheHeaders[CP_LOW].removeb(handle.index);
cacheHeaders[CP_MEDIUM].removeb(handle.index);
cacheHeaders[CP_HIGH].removeb(handle.index);
} else if (cacheHeaders[CP_HIGH].getb(handle.index) != null) {
// remove handle from cache-control
cacheScore.deleteScore(handle);
cacheHeaders[CP_HIGH].removeb(handle.index);
} else {
// no cache control for medium-priority entries and
// no cache control for low-priority entries
cacheHeaders[CP_MEDIUM].removeb(handle.index);
cacheHeaders[CP_LOW].removeb(handle.index);
}
cacheHeaders.removeb(handle.index);
cacheDelete++;
}
}
@ -657,15 +612,7 @@ public class kelondroRecords {
} else synchronized(cacheHeaders) {
byte[] cacheEntry = null;
int cp = CP_HIGH;
cacheEntry = cacheHeaders[CP_HIGH].getb(this.handle.index); // first try
if (cacheEntry == null) {
cacheEntry = cacheHeaders[CP_MEDIUM].getb(this.handle.index); // second try
cp = CP_MEDIUM;
}
if (cacheEntry == null) {
cacheEntry = cacheHeaders[CP_LOW].getb(this.handle.index); // third try
cp = CP_LOW;
}
cacheEntry = cacheHeaders.getb(this.handle.index);
if (cacheEntry == null) {
// cache miss, we read overhead and key from file
readMiss++;
@ -694,10 +641,6 @@ public class kelondroRecords {
//this.headChunk = new byte[headchunksize];
//System.arraycopy(cacheEntry, 0, this.headChunk, 0, headchunksize);
this.headChunk = cacheEntry;
// update cache scores to announce this cache hit
if ((cacheScore != null) && (cp == CP_HIGH)) {
cacheScore.setScore(this.handle, (int) ((System.currentTimeMillis() - cacheStartup) / 1000));
}
this.headChanged = false;
}
}
@ -855,90 +798,27 @@ public class kelondroRecords {
}
private void update2Cache(int forPriority) {
if (cacheSize > 0) {
cacheHeaders[CP_LOW].removeb(this.handle.index);
cacheHeaders[CP_MEDIUM].removeb(this.handle.index);
cacheHeaders[CP_HIGH].removeb(this.handle.index);
}
if (cacheSpace(forPriority)) updateNodeCache(forPriority);
if (cacheSpace()) updateNodeCache(forPriority);
}
private boolean cacheSpace(int forPriority) {
private boolean cacheSpace() {
// check for space in cache
// should be only called within a synchronized(XcacheHeaders) environment
// returns true if it is allowed to add another entry to the cache
// returns false if the cache is considered to be full
if (forPriority == CP_NONE) return false;
if (cacheSize == 0) return false; // no caching
long cs = cacheHeaders[CP_LOW].size() + cacheHeaders[CP_MEDIUM].size() + cacheHeaders[CP_HIGH].size();
if (cs == 0) return true; // nothing there to flush
if ((cs < cacheSize) && (availableMemory() >= memBlock)) return true; // no need to flush cache space
if (cacheHeaders.size() == 0) return true; // nothing there to flush
if ((cacheHeaders.size() < cacheSize) && (availableMemory() >= memBlock)) return true; // no need to flush cache space
// delete one entry. distinguish between different priority cases:
if (forPriority == CP_LOW) {
// remove only from low-priority cache
if (cacheHeaders[CP_LOW].size() != 0) {
// just delete any of the low-priority entries
cacheHeaders[CP_LOW].removeOne();
cacheFlush++;
return true;
} else {
// we cannot delete any entry, therefore there is no space for another entry
return false;
}
} else if (forPriority == CP_MEDIUM) {
if (cacheHeaders[CP_LOW].size() != 0) {
// just delete any of the low-priority entries
cacheHeaders[CP_LOW].removeOne();
if (cacheHeaders.size() != 0) {
// just delete any of the entries
cacheHeaders.removeOne();
cacheFlush++;
return true;
} else if (cacheHeaders[CP_MEDIUM].size() != 0) {
// just delete any of the medium-priority entries
cacheHeaders[CP_MEDIUM].removeOne();
cacheFlush++;
return true;
} else {
// we cannot delete any entry, therefore there is no space for another entry
return false;
}
} else {
// request for a high-priority entry
if (cacheHeaders[CP_LOW].size() != 0) {
// just delete any of the low-priority entries
cacheHeaders[CP_LOW].removeOne();
cacheFlush++;
return true;
} else if (cacheHeaders[CP_MEDIUM].size() != 0) {
// just delete any of the medium-priority entries
cacheHeaders[CP_MEDIUM].removeOne();
cacheFlush++;
return true;
} else if (cacheScore == null) {
// no cache-control of high-priority cache
// the cache is considered as full
// we cannot delete any entry, therefore there is no space for another entry
return false;
} else try {
// delete one from the high-priority entries
// use the cache-control to find the right object
Handle delkey = (Handle) cacheScore.getMinObject();
cacheScore.deleteScore(delkey);
cacheHeaders[CP_HIGH].removeb(delkey.index);
cacheFlush++;
return true;
} catch (NoSuchElementException e) {
// this is a strange error and could be caused by internal java problems
// we simply clear the cache
String error = "cachScore error: " + e.getMessage() + "; cachesize=" + cacheSize + ", cache.size()=[" + cacheHeaders[0].size() + "," + cacheHeaders[1].size() + "," + cacheHeaders[2].size() + "], cacheScore.size()=" + cacheScore.size();
cacheScore = new kelondroMScoreCluster();
cacheHeaders[CP_LOW] = new kelondroIntBytesMap(headchunksize, cacheSize / 4);
cacheHeaders[CP_MEDIUM] = new kelondroIntBytesMap(headchunksize, 0);
cacheHeaders[CP_HIGH] = new kelondroIntBytesMap(headchunksize, cacheSize / 4);
cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
cacheHeaders[2].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
throw new kelondroException(filename, error);
}
}
}
@ -947,8 +827,7 @@ public class kelondroRecords {
if (this.headChunk == null) return; // nothing there to cache
if (priority == CP_NONE) return; // it is not wanted that this shall be cached
if (cacheSize == 0) return; // we do not use the cache
int cs = cacheHeaders[CP_LOW].size() + cacheHeaders[CP_MEDIUM].size() + cacheHeaders[CP_HIGH].size();
if (cs >= cacheSize) return; // no cache update if cache is full
if (cacheHeaders.size() >= cacheSize) return; // no cache update if cache is full
synchronized (cacheHeaders) {
// generate cache entry
@ -958,13 +837,7 @@ public class kelondroRecords {
// store the cache entry
boolean upd = false;
if (priority != CP_LOW) upd = upd || (cacheHeaders[CP_LOW].removeb(cacheHandle.index) != null);
if (priority != CP_MEDIUM) upd = upd || (cacheHeaders[CP_MEDIUM].removeb(cacheHandle.index) != null);
if (priority != CP_HIGH) upd = upd || (cacheHeaders[CP_HIGH].removeb(cacheHandle.index) != null);
cacheHeaders[priority].putb(cacheHandle.index, headChunk);
if ((cacheScore != null) && (priority == CP_HIGH)) {
cacheScore.setScore(cacheHandle, (int) ((System.currentTimeMillis() - cacheStartup) / 1000));
}
cacheHeaders.putb(cacheHandle.index, headChunk);
if (upd) writeDouble++; else writeUnique++;
// delete the cache entry buffer
@ -991,15 +864,15 @@ public class kelondroRecords {
System.out.println();
}
} else {
System.out.println("### cache report: [" + cacheHeaders[0].size() + "," + cacheHeaders[0].size() + "," + cacheHeaders[0].size() + "] entries");
for (int cp = 0; cp < 3; cp++) {
Iterator i = cacheHeaders[cp].elements();
System.out.println("### cache report: " + cacheHeaders.size() + " entries");
Iterator i = cacheHeaders.elements();
byte[] entry;
while (i.hasNext()) {
entry = (byte[]) i.next();
// print from cache
System.out.print("#C " + cp + " ");
System.out.print("#C ");
printChunk((byte[]) entry);
System.out.println();
@ -1013,7 +886,6 @@ public class kelondroRecords {
*/
System.out.println();
}
}
}
System.out.println("### end report");
}
@ -1374,11 +1246,7 @@ public class kelondroRecords {
public kelondroProfile[] profiles() {
return new kelondroProfile[]{
(cacheHeaders == null) ? new kelondroProfile() :
kelondroProfile.consolidate(new kelondroProfile[]{
cacheHeaders[0].profile(),
cacheHeaders[1].profile(),
cacheHeaders[2].profile()
}),
cacheHeaders.profile(),
entryFile.profile()
};
}

@ -99,7 +99,7 @@ public final class kelondroStack extends kelondroRecords {
public static kelondroStack reset(kelondroStack stack) {
// memorize settings to this file
File f = new File(stack.filename);
long bz = stack.cacheNodeStatus()[0] * stack.cacheNodeChunkSize(true);
long bz = stack.cacheNodeStatus()[0] * stack.cacheNodeChunkSize();
kelondroRow row = stack.row();
// close and delete the file

@ -576,7 +576,7 @@ public final class plasmaCrawlLURL extends indexURL {
kelondroBase64Order.enhancedCoder.encodeLong(wordCount, urlWordCountLength).getBytes(),
};
urlHashCache.put(urlHashCache.row().newEntry(entry));
serverLog.logFine("PLASMA","STORED new LURL " + url.toString());
//serverLog.logFine("PLASMA","STORED new LURL " + url.toString());
this.stored = true;
} catch (Exception e) {
serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString(), e);

@ -78,7 +78,7 @@ public class plasmaCrawlProfile {
domsCache = new HashMap();
}
public int[] dbCacheNodeChunkSize() {
public int dbCacheNodeChunkSize() {
return profileTable.cacheNodeChunkSize();
}

@ -85,7 +85,7 @@ public class plasmaCrawlRobotsTxt {
}
}
public int[] dbCacheNodeChunkSize() {
public int dbCacheNodeChunkSize() {
return robotsTable.cacheNodeChunkSize();
}

@ -203,7 +203,7 @@ public final class plasmaHTCache {
return this.responseHeaderDB.size();
}
public int[] dbCacheChunkSize() {
public int dbCacheChunkSize() {
return this.responseHeaderDB.cacheNodeChunkSize();
}

@ -808,7 +808,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// work off unwritten files
if (entry.cacheArray == null) {
this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile);
//this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile);
} else {
String error = entry.shallStoreCacheForProxy();
if (error == null) {

@ -133,7 +133,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return assortmentCluster.sizes();
}
public int[] assortmentsCacheChunkSizeAvg() {
public int assortmentsCacheChunkSizeAvg() {
return assortmentCluster.cacheChunkSizeAvg();
}

@ -264,7 +264,7 @@ public final class plasmaWordIndexAssortment {
return assortments.size();
}
public int[] cacheNodeChunkSize() {
public int cacheNodeChunkSize() {
return assortments.cacheNodeChunkSize();
}

@ -290,19 +290,14 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
return sizes;
}
public int[] cacheChunkSizeAvg() {
int[] i = new int[]{0, 0, 0};
int[] a = new int[3];
public int cacheChunkSizeAvg() {
int i = 0;
int a;
for (int j = 0; j < clusterCount; j++) {
a = assortments[j].cacheNodeChunkSize();
i[kelondroRecords.CP_LOW] += a[kelondroRecords.CP_LOW];
i[kelondroRecords.CP_MEDIUM] += a[kelondroRecords.CP_MEDIUM];
i[kelondroRecords.CP_HIGH] += a[kelondroRecords.CP_HIGH];
i += a;
}
a[kelondroRecords.CP_LOW] = i[kelondroRecords.CP_LOW] / clusterCount;
a[kelondroRecords.CP_MEDIUM] = i[kelondroRecords.CP_MEDIUM] / clusterCount;
a[kelondroRecords.CP_HIGH] = i[kelondroRecords.CP_HIGH] / clusterCount;
return a;
return i / clusterCount;
}
public int[] cacheNodeStatus() {

@ -100,7 +100,7 @@ public class yacyNewsDB {
news = createDB(path, bufferkb);
}
public int[] dbCacheNodeChunkSize() {
public int dbCacheNodeChunkSize() {
return news.cacheNodeChunkSize();
}

@ -106,7 +106,7 @@ public class yacyNewsPool {
return newsDB.size();
}
public int[] dbCacheNodeChunkSize() {
public int dbCacheNodeChunkSize() {
return newsDB.dbCacheNodeChunkSize();
}

@ -170,15 +170,12 @@ public final class yacySeedDB {
} catch (IOException e) {}
}
public int[] dbCacheNodeChunkSize() {
int[] ac = seedActiveDB.cacheNodeChunkSize();
int[] pa = seedPassiveDB.cacheNodeChunkSize();
int[] po = seedPotentialDB.cacheNodeChunkSize();
int[] i = new int[3];
i[kelondroRecords.CP_LOW] = (ac[kelondroRecords.CP_LOW] + pa[kelondroRecords.CP_LOW] + po[kelondroRecords.CP_LOW]) / 3;
i[kelondroRecords.CP_MEDIUM] = (ac[kelondroRecords.CP_MEDIUM] + pa[kelondroRecords.CP_MEDIUM] + po[kelondroRecords.CP_MEDIUM]) / 3;
i[kelondroRecords.CP_HIGH] = (ac[kelondroRecords.CP_HIGH] + pa[kelondroRecords.CP_HIGH] + po[kelondroRecords.CP_HIGH]) / 3;
return i;
public int dbCacheNodeChunkSize() {
int ac = seedActiveDB.cacheNodeChunkSize();
int pa = seedPassiveDB.cacheNodeChunkSize();
int po = seedPotentialDB.cacheNodeChunkSize();
return (ac+ pa + po) / 3;
}
public int[] dbCacheNodeStatus() {

Loading…
Cancel
Save