From 3f1ebc097e26dd8c75fc879807f6cb993b37536c Mon Sep 17 00:00:00 2001 From: hermens Date: Mon, 12 Jun 2006 12:14:11 +0000 Subject: [PATCH] Limit the size of the DNS cache to 5000 and the age of the entries to one day. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2199 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/PerformanceMemory_p.html | 18 +++++++++ htroot/PerformanceMemory_p.java | 7 ++++ source/de/anomic/http/httpc.java | 65 ++++++++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/htroot/PerformanceMemory_p.html b/htroot/PerformanceMemory_p.html index 2390c642d..f0ef1111f 100644 --- a/htroot/PerformanceMemory_p.html +++ b/htroot/PerformanceMemory_p.html @@ -395,6 +395,24 @@ Increasing this cache may speed up crawling, but not much space is needed, so th

--> + +
Other caching structures:
+ + + + + + + + + + + + + +
TypeAmount
DNSCache#[namecache.hit]#
DNSNoCache#[namecache.noCache]#
+

+ #%env/templates/footer.template%# diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index d4e40123c..722eedafd 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -47,6 +47,7 @@ import java.util.Map; import java.io.File; +import de.anomic.http.httpc; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; @@ -282,6 +283,12 @@ public class PerformanceMemory_p { prop.put("heap" , Integer.toString(c)); */ + // other caching structures + long amount = httpc.nameCacheHitSize(); + prop.put("namecache.hit",Long.toString(amount)); + amount = httpc.nameCacheNoCachingListSize(); + prop.put("namecache.noCache",Long.toString(amount)); + // return rewrite values for templates return prop; } diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index fede8984e..4b6eacb0a 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -48,6 +48,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PushbackInputStream; +import java.lang.Math; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.MalformedURLException; @@ -81,6 +82,7 @@ import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCore; @@ -126,6 +128,10 @@ public final class httpc { // the dns cache private static final Map nameCacheHit = Collections.synchronizedMap(new HashMap()); // a not-synchronized map resulted in deadlocks + private static final kelondroMScoreCluster nameCacheAges = new kelondroMScoreCluster(); + private static final long startTime = System.currentTimeMillis(); + private static final int maxNameCacheAge = 24 * 60 * 60; // 24 hours in minutes + private static final int maxNameCacheSize = 5000; public static final List nameCacheNoCachingPatterns = Collections.synchronizedList(new LinkedList()); private static final Set nameCacheNoCachingList = Collections.synchronizedSet(new HashSet()); //private static HashSet nameCacheMiss = new HashSet(); @@ -417,6 +423,9 @@ public final class httpc { if ((host == null)||(host.length() == 0)) return null; host = host.toLowerCase().trim(); + // flushing old entries before accsessing + flushNameCacheHit(); + // trying to resolve host by doing a name cache lookup InetAddress ip = (InetAddress) nameCacheHit.get(host); if (ip != null) return ip; @@ -444,7 +453,12 @@ public final class httpc { } } - if (doCaching) nameCacheHit.put(ip.getHostName(), ip); + if (doCaching) { + synchronized (nameCacheHit) { + nameCacheHit.put(ip.getHostName(), ip); + nameCacheAges.setScore(ip.getHostName(), intTime(System.currentTimeMillis())); + } + } return ip; } catch (UnknownHostException e) { //nameCacheMiss.add(host); @@ -474,6 +488,53 @@ public final class httpc { // } // } + /** + * Returns the number of entries in the nameCacheHit map + * + * @return int The number of entries in the nameCacheHit map + */ + public static int nameCacheHitSize() { + return nameCacheHit.size(); + } + + /** + * Returns the number of entries in the nameCacheNoCachingList list + * + * @return int The number of entries in the nameCacheNoCachingList list + */ + public static int nameCacheNoCachingListSize() { + return nameCacheNoCachingList.size(); + } + + /** + * Converts the time to a non negative int + * + * @param longTime Time in miliseconds since 01/01/1970 00:00 GMT + * @return int seconds since startTime + */ + private static int intTime(long longTime) { + return (int) Math.max(0, ((longTime - startTime) / 1000)); + } + + /** + * Removes old entries from the dns cache + */ + public static void flushNameCacheHit() { + int cutofftime = intTime(System.currentTimeMillis()) - maxNameCacheAge; + int size; + String k; + synchronized (nameCacheHit) { + size = nameCacheAges.size(); + while ((size > 0) && + (size > maxNameCacheSize) || (nameCacheAges.getMinScore() < cutofftime)) { + k = (String) nameCacheAges.getMinObject(); + nameCacheHit.remove(k); + nameCacheAges.deleteScore(k); + size--; // size = nameCacheAges.size(); + } + } + } + /** * Returns the given date in an HTTP-usable format. * @@ -494,8 +555,6 @@ public final class httpc { return new GregorianCalendar(GMTTimeZone).getTime(); } - - /** * Initialize the httpc-instance with the given data. This method is used, * if you have to use a proxy to access the pages. This just calls init