From 9dd790087d2ab75cb4b29a99e1db0652eaaf8f17 Mon Sep 17 00:00:00 2001 From: luccioman Date: Thu, 15 Jun 2017 09:50:02 +0200 Subject: [PATCH] Added HT Cache basic statistics (hit rate) --- htroot/ConfigHTCache_p.html | 2 + htroot/ConfigHTCache_p.java | 8 +++ source/net/yacy/crawler/data/Cache.java | 63 +++++++++++++++++-- .../java/net/yacy/crawler/data/CacheTest.java | 33 ++-------- 4 files changed, 72 insertions(+), 34 deletions(-) diff --git a/htroot/ConfigHTCache_p.html b/htroot/ConfigHTCache_p.html index 48bc2eb9a..cf4b3b903 100644 --- a/htroot/ConfigHTCache_p.html +++ b/htroot/ConfigHTCache_p.html @@ -16,6 +16,8 @@
HTCache Configuration
+
Cache hits
+
#[hitRate]# ( #[hits]# hits for a total of #[requests]# requests )
diff --git a/htroot/ConfigHTCache_p.java b/htroot/ConfigHTCache_p.java index 88baf9c95..827dab17a 100644 --- a/htroot/ConfigHTCache_p.java +++ b/htroot/ConfigHTCache_p.java @@ -28,6 +28,7 @@ import java.io.File; import java.io.IOException; +import java.text.NumberFormat; import java.util.zip.Deflater; import net.yacy.cora.protocol.RequestHeader; @@ -117,6 +118,13 @@ public class ConfigHTCache_p { prop.put("actualCacheDocCount", Cache.getActualCacheDocCount()); prop.put("docSizeAverage", Cache.getActualCacheDocCount() == 0 ? 0 : Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024); prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64)); + /* Statistics */ + final long hits = Cache.getHits(); + final long totalRequests = Cache.getTotalRequests(); + prop.put("hits", hits); + prop.put("requests", totalRequests); + prop.put("hitRate", NumberFormat.getPercentInstance().format(Cache.getHitRate())); + // return rewrite properties return prop; } diff --git a/source/net/yacy/crawler/data/Cache.java b/source/net/yacy/crawler/data/Cache.java index bc5189e4f..5c95175ba 100644 --- a/source/net/yacy/crawler/data/Cache.java +++ b/source/net/yacy/crawler/data/Cache.java @@ -40,6 +40,7 @@ import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.Map; import java.util.concurrent.BlockingQueue; +import java.util.concurrent.atomic.AtomicLong; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; @@ -74,7 +75,14 @@ public final class Cache { private static Compressor fileDB = null; private static ArrayStack fileDBunbuffered = null; - private static long maxCacheSize = Long.MAX_VALUE; + private static volatile long maxCacheSize = Long.MAX_VALUE; + + /** Total number of requests for cached response since last start/initialization or cache clear */ + private static AtomicLong totalRequests = new AtomicLong(0); + + /** Total number of cache hits since last start/initialization or cache clear */ + private static AtomicLong hits = new AtomicLong(0); + private static File cachePath = null; private static String prefix; public static final ConcurrentLog log = new ConcurrentLog("HTCACHE"); @@ -91,6 +99,8 @@ public final class Cache { cachePath = htCachePath; maxCacheSize = cacheSizeMax; prefix = peerSalt; + totalRequests.set(0); + hits.set(0); // set/make cache path if (!htCachePath.exists()) { @@ -201,6 +211,9 @@ public final class Cache { } catch (final IOException e) { ConcurrentLog.logException(e); } + /* Clear statistics */ + totalRequests.set(0); + hits.set(0); } /** @@ -287,13 +300,17 @@ public final class Cache { * @return true if the content of the url is in the cache, false otherwise */ public static boolean has(final byte[] urlhash) { + totalRequests.incrementAndGet(); boolean headerExists; boolean fileExists; //synchronized (responseHeaderDB) { headerExists = responseHeaderDB.containsKey(urlhash); fileExists = fileDB.containsKey(urlhash); //} - if (headerExists && fileExists) return true; + if (headerExists && fileExists) { + hits.incrementAndGet(); + return true; + } if (!headerExists && !fileExists) return false; // if not both is there then we do a clean-up if (headerExists) try { @@ -317,7 +334,7 @@ public final class Cache { * info object couldn't be created */ public static ResponseHeader getResponseHeader(final byte[] hash) { - + totalRequests.incrementAndGet(); // loading data from database Map hdb = null; try { @@ -327,8 +344,11 @@ public final class Cache { } catch (final SpaceExceededException e) { return null; } - if (hdb == null) return null; + if (hdb == null) { + return null; + } + hits.incrementAndGet(); return new ResponseHeader(hdb); } @@ -341,10 +361,14 @@ public final class Cache { * is returned. */ public static byte[] getContent(final byte[] hash) { + totalRequests.incrementAndGet(); // load the url as resource from the cache try { final byte[] b = fileDB.get(hash); - if (b == null) return null; + if (b == null) { + return null; + } + hits.incrementAndGet(); return b; } catch (final UnsupportedEncodingException e) { ConcurrentLog.logException(e); @@ -362,9 +386,14 @@ public final class Cache { } public static boolean hasContent(final byte[] hash) { + totalRequests.incrementAndGet(); // load the url as resource from the cache try { - return fileDB.containsKey(hash); + boolean result = fileDB.containsKey(hash); + if(result) { + hits.incrementAndGet(); + } + return result; } catch (final OutOfMemoryError e) { ConcurrentLog.logException(e); return false; @@ -380,4 +409,26 @@ public final class Cache { responseHeaderDB.delete(hash); fileDB.delete(hash); } + + /** + * @return the total number of requests for cache content since last start/initialization or cache clear + */ + public static long getTotalRequests() { + return totalRequests.get(); + } + + /** + * @return the total number of cache hits (cached response found) since last start/initialization or cache clear + */ + public static long getHits() { + return hits.get(); + } + + /** + * @return the hit rate (proportion of hits over total requests) + */ + public static double getHitRate() { + final long total = totalRequests.get(); + return total > 0 ? ((Cache.getHits() / ((double) total))) : 0.0 ; + } } diff --git a/test/java/net/yacy/crawler/data/CacheTest.java b/test/java/net/yacy/crawler/data/CacheTest.java index 8c8fe5d96..3feb29c23 100644 --- a/test/java/net/yacy/crawler/data/CacheTest.java +++ b/test/java/net/yacy/crawler/data/CacheTest.java @@ -29,6 +29,7 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.nio.charset.StandardCharsets; +import java.text.NumberFormat; import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -266,12 +267,6 @@ public class CacheTest { /** Sleep time (in milliseconds) between each cache operations */ private final long sleepTime; - /** Number of Cache.getContent() successes (not null returned) */ - private int getContentSuccesses; - - /** Number of Cache.getContent() misses (null returned) */ - private int getContentMisses; - /** Number of Cache.store() failures */ private int storeFailures; @@ -333,8 +328,6 @@ public class CacheTest { this.storeTime = 0; this.maxStoreTime = 0; - this.getContentMisses = 0; - this.getContentSuccesses = 0; this.storeFailures = 0; this.getContentTime = 0; this.maxGetContentTime = 0; @@ -366,11 +359,7 @@ public class CacheTest { /* Measure content retrieval */ time = System.nanoTime(); - if (Cache.getContent(urlHash) == null) { - this.getContentMisses++; - } else { - this.getContentSuccesses++; - } + Cache.getContent(urlHash); time = (System.nanoTime() - time); this.getContentTime += time; this.maxGetContentTime = Math.max(time, this.maxGetContentTime); @@ -449,14 +438,6 @@ public class CacheTest { return this.maxDeleteTime; } - public int getGetContentMisses() { - return this.getContentMisses; - } - - public int getGetContentSuccesses() { - return this.getContentSuccesses; - } - public int getStoreFailures() { return this.storeFailures; } @@ -543,8 +524,6 @@ public class CacheTest { long maxStoreTime = 0; long getContentTime = 0; long maxGetContentTime = 0; - int getContentMisses = 0; - int getContentSuccesses = 0; int storeFailures = 0; long deleteTime = 0; long maxDeleteTime = 0; @@ -554,8 +533,6 @@ public class CacheTest { maxStoreTime = Math.max(task.getMaxStoreTime(), maxStoreTime); getContentTime += task.getGetContentTime(); maxGetContentTime = Math.max(task.getMaxGetContentTime(), maxGetContentTime); - getContentMisses += task.getGetContentMisses(); - getContentSuccesses += task.getGetContentSuccesses(); storeFailures += task.getStoreFailures(); deleteTime += task.getDeleteTime(); maxDeleteTime = Math.max(task.getMaxDeleteTime(), maxDeleteTime); @@ -573,9 +550,9 @@ public class CacheTest { "Cache.getContent() maximum time (ms) : " + TimeUnit.NANOSECONDS.toMillis(maxGetContentTime)); System.out.println("Cache.getContent() mean time (ms) : " + TimeUnit.NANOSECONDS.toMillis(getContentTime / totalSteps)); - System.out.println("Cache.getContent() : misses " + getContentMisses + " successes : " - + getContentSuccesses + " ( miss rate : " - + ((getContentMisses / ((double) getContentSuccesses + (double) getContentMisses)) * 100) + "% )"); + System.out.println("Cache hits : " + Cache.getHits() + " total requests : " + + Cache.getTotalRequests() + " ( hit rate : " + + NumberFormat.getPercentInstance().format(Cache.getHitRate()) + " )"); System.out.println(""); System.out.println("Cache.delete() total time (ms) : " + TimeUnit.NANOSECONDS.toMillis(deleteTime)); System.out.println("Cache.delete() maximum time (ms) : " + TimeUnit.NANOSECONDS.toMillis(maxDeleteTime));