From cda087f43b17d839caa1a22fee1ae89c9cd641b5 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 23 May 2006 16:43:28 +0000 Subject: [PATCH] - integrated cache miss storage into object cache - removed cache-miss handling from indexURL todo: new Monitoring in PerformanceMemory_p git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2132 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/PerformanceMemory_p.java | 6 +- source/de/anomic/index/indexURL.java | 46 +-------- .../anomic/kelondro/kelondroObjectCache.java | 99 +++++++++++++++---- source/de/anomic/kelondro/kelondroTree.java | 2 + source/de/anomic/plasma/plasmaCrawlEURL.java | 24 +++-- source/de/anomic/plasma/plasmaCrawlLURL.java | 38 +++++-- source/de/anomic/plasma/plasmaCrawlNURL.java | 25 +---- .../de/anomic/plasma/plasmaSwitchboard.java | 13 --- source/de/anomic/server/serverCore.java | 3 - 9 files changed, 132 insertions(+), 124 deletions(-) diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index f1dd395ee..3107231dd 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -285,13 +285,13 @@ public class PerformanceMemory_p { */ // other caching structures - long amount = sb.urlPool.errorURL.existsIndexSize(); + long amount = 0; prop.put("eurl.existsIndexAmount",Long.toString(amount)); prop.put("eurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28))); - amount = sb.urlPool.noticeURL.existsIndexSize(); + amount = 0; prop.put("nurl.existsIndexAmount",Long.toString(amount)); prop.put("nurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28))); - amount = sb.urlPool.loadedURL.existsIndexSize(); + amount = 0; prop.put("lurl.existsIndexAmount",Long.toString(amount)); prop.put("lurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28))); diff --git a/source/de/anomic/index/indexURL.java b/source/de/anomic/index/indexURL.java index 322f498f3..36f6658ff 100644 --- a/source/de/anomic/index/indexURL.java +++ b/source/de/anomic/index/indexURL.java @@ -1,7 +1,6 @@ package de.anomic.index; import java.io.IOException; -import java.lang.Boolean; import java.net.URL; import java.net.MalformedURLException; import java.text.SimpleDateFormat; @@ -383,11 +382,9 @@ public class indexURL { // the class object protected kelondroTree urlHashCache; - protected final HashMap existsIndex; // allow subclasses to access the existsIndex during Entry.store() - + public indexURL() { urlHashCache = null; - existsIndex = new HashMap(); } public int size() { @@ -410,47 +407,6 @@ public class indexURL { return urlHashCache.cacheObjectStatus(); } - public boolean exists(String urlHash) { - synchronized (existsIndex) { - Boolean existsInIndex = (Boolean) existsIndex.get(urlHash); - if (existsInIndex != null) return existsInIndex.booleanValue(); - try { - if (urlHashCache.get(urlHash.getBytes()) != null) { - existsIndex.put(urlHash, Boolean.TRUE); - return true; - } else { - existsIndex.put(urlHash, Boolean.FALSE); - return false; - } - } catch (IOException e) { - return false; - } - } - } - - public long existsIndexSize() { - return this.existsIndex.size(); - } - - public boolean remove(String urlHash) { - synchronized (existsIndex) { - try { - Boolean existsInIndex = (Boolean) existsIndex.remove(urlHash); - if (existsInIndex == null) existsInIndex = Boolean.FALSE; - boolean existsInCache = (this.urlHashCache.remove(urlHash.getBytes()) != null); - existsIndex.put(urlHash, Boolean.FALSE); - return existsInIndex.booleanValue() || existsInCache; - } catch (IOException e) { - return false; - } - } - } - - public void clearExistsIndex() { - synchronized (existsIndex) { - existsIndex.clear(); - } - } public static final int flagTypeID(String hash) { return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5; diff --git a/source/de/anomic/kelondro/kelondroObjectCache.java b/source/de/anomic/kelondro/kelondroObjectCache.java index c1e4e4236..ee036e8bd 100644 --- a/source/de/anomic/kelondro/kelondroObjectCache.java +++ b/source/de/anomic/kelondro/kelondroObjectCache.java @@ -61,18 +61,20 @@ import java.util.TreeMap; public class kelondroObjectCache { private final TreeMap cache; - private final kelondroMScoreCluster ages; + private final kelondroMScoreCluster ages, hasnot; private long startTime; private int maxSize; private long maxAge; private long minMem; private int readHit, readMiss, writeUnique, writeDouble; + private int hasnotHit, hasnotMiss, hasnotUnique, hasnotDouble; private String name; public kelondroObjectCache(String name, int maxSize, long maxAge, long minMem) { this.name = name; this.cache = new TreeMap(); this.ages = new kelondroMScoreCluster(); + this.hasnot = new kelondroMScoreCluster(); this.startTime = System.currentTimeMillis(); this.maxSize = Math.max(maxSize, 1); this.maxAge = Math.max(maxAge, 10000); @@ -81,6 +83,10 @@ public class kelondroObjectCache { this.readMiss = 0; this.writeUnique = 0; this.writeDouble = 0; + this.hasnotHit = 0; + this.hasnotMiss = 0; + this.hasnotUnique = 0; + this.hasnotDouble = 0; } public String getName() { @@ -170,27 +176,77 @@ public class kelondroObjectCache { synchronized(cache) { prev = cache.put(key, value); ages.setScore(key, intTime(System.currentTimeMillis())); + hasnot.deleteScore(key); } if (prev == null) this.writeUnique++; else this.writeDouble++; - flush(); + flushc(); } public Object get(byte[] key) { if (key == null) return null; Object r = cache.get(new String(key)); - flush(); - if (r == null) this.readMiss++; else this.readHit++; + flushc(); + if (r == null) { + this.readMiss++; + } else { + hasnot.deleteScore(key); + this.readHit++; + } return r; } public Object get(String key) { if (key == null) return null; Object r = cache.get(key); - flush(); - if (r == null) this.readMiss++; else this.readHit++; + flushc(); + if (r == null) { + this.readMiss++; + } else { + hasnot.deleteScore(key); + this.readHit++; + } return r; } + public void hasnot(byte[] key) { + hasnot(new String(key)); + } + + public void hasnot(String key) { + if (key == null) return; + int prev = 0; + synchronized(cache) { + cache.remove(key); + ages.deleteScore(key); + prev = hasnot.getScore(key); + hasnot.setScore(key, intTime(System.currentTimeMillis())); + } + if (prev == 0) this.hasnotUnique++; else this.hasnotDouble++; + flushh(); + } + + public int has(byte[] key) { + return has(new String(key)); + } + + public int has(String key) { + // returns a 3-value boolean: + // 1 = key definitely exists + // -1 = key definitely does not exist + // 0 = unknown, if key exists + if (key == null) return 0; + synchronized(cache) { + if (hasnot.getScore(key) > 0) { + this.hasnotHit++; + return -1; + } + this.hasnotMiss++; + if (cache.get(key) != null) return 1; + } + flushh(); + return 0; + } + public void remove(byte[] key) { remove(new String(key)); } @@ -200,34 +256,37 @@ public class kelondroObjectCache { synchronized(cache) { cache.remove(key); ages.deleteScore(key); + hasnot.setScore(key, intTime(System.currentTimeMillis())); } - flush(); } - public void flush() { + public void flushc() { String k; synchronized(cache) { while ((ages.size() > 0) && - ((k = bestFlush()) != null) && - ((size() > maxSize) || + ((k = (String) ages.getMinObject()) != null) && + ((ages.size() > maxSize) || ((System.currentTimeMillis() - longEmit(ages.getScore(k))) > maxAge) || (Runtime.getRuntime().freeMemory() < minMem)) ) { cache.remove(k); ages.deleteScore(k); - //if (Runtime.getRuntime().freeMemory() < minMem) System.gc(); // prevent unnecessary loops } } } - public String bestFlush() { - if (cache.size() == 0) return null; - try { - synchronized (cache) { - return (String) ages.getMinObject(); // flush oldest entries - } - } catch (Exception e) {} - return null; + public void flushh() { + String k; + synchronized(cache) { + while ((hasnot.size() > 0) && + ((k = (String) hasnot.getMinObject()) != null) && + ((hasnot.size() > maxSize) || + ((System.currentTimeMillis() - longEmit(hasnot.getScore(k))) > maxAge) || + (Runtime.getRuntime().freeMemory() < minMem)) + ) { + hasnot.deleteScore(k); + + } + } } - } diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 6a6af60e1..5764fb8d6 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -228,6 +228,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { //System.out.println("cache hit in objectCache, db:" + super.filename); return result; } + if ((objectCache != null) && (objectCache.has(key) == -1)) return null; synchronized (writeSearchObj) { writeSearchObj.process(key); if (writeSearchObj.found()) { @@ -235,6 +236,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { if (objectCache != null) objectCache.put(key, result); } else { result = null; + if (objectCache != null) objectCache.hasnot(key); } } return result; diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index 63b0fa67f..dce1821e8 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -45,7 +45,6 @@ package de.anomic.plasma; import java.io.File; import java.io.IOException; -import java.lang.Boolean; import java.net.URL; import java.util.Date; import java.util.Enumeration; @@ -118,6 +117,22 @@ public class plasmaCrawlEURL extends indexURL { return new Entry(hash); } + public boolean exists(String urlHash) { + try { + return (urlHashCache.get(urlHash.getBytes()) != null); + } catch (IOException e) { + return false; + } + } + + public boolean remove(String urlHash) { + try { + return (this.urlHashCache.remove(urlHash.getBytes()) != null); + } catch (IOException e) { + return false; + } + } + public void clearStack() { rejectedStack.clear(); } @@ -207,11 +222,8 @@ public class plasmaCrawlEURL extends indexURL { kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, urlRetryLength).getBytes(), this.failreason.getBytes(), this.flags.getBytes() - }; - synchronized(existsIndex) { - urlHashCache.put(entry); - existsIndex.put(this.hash, Boolean.TRUE); - } + }; + urlHashCache.put(entry); } catch (IOException e) { System.out.println("INTERNAL ERROR AT plasmaEURL:url2hash:" + e.toString()); } diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index de4ba9f6e..e2e316701 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -54,7 +54,6 @@ package de.anomic.plasma; import java.io.File; import java.io.IOException; -import java.lang.Boolean; import java.net.MalformedURLException; import java.net.URL; import java.text.SimpleDateFormat; @@ -270,19 +269,41 @@ public final class plasmaCrawlLURL extends indexURL { } public boolean remove(String urlHash) { - boolean exists1 = super.remove(urlHash); for (int stack = 1; stack <= 6; stack++) { for (int i = getStackSize(stack) - 1; i >= 0; i--) { if (getUrlHash(stack,i).equals(urlHash)) { - boolean exits2 = removeStack(stack,i); - exists1 = exists1 || exits2; - return exists1; + return removeStack(stack,i); } } } - return exists1; + return false; } + + public boolean exists(String urlHash) { + try { + if (urlHashCache.get(urlHash.getBytes()) != null) { + return true; + } else { + return false; + } + } catch (IOException e) { + return false; + } + } + + /* + public long existsIndexSize() { + return this.existsIndex.size(); + } + + public void clearExistsIndex() { + synchronized (existsIndex) { + existsIndex.clear(); + } + } + */ + private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); private static String daydate(Date date) { if (date == null) { @@ -498,11 +519,10 @@ public final class plasmaCrawlLURL extends indexURL { public void store() { // Check if there is a more recent Entry already in the DB if (this.stored) return; - synchronized(existsIndex) { Entry oldEntry; try { if (exists(urlHash)) { - oldEntry = new Entry (urlHash, null); + oldEntry = new Entry(urlHash, null); } else { oldEntry = null; } @@ -554,11 +574,9 @@ public final class plasmaCrawlLURL extends indexURL { urlHashCache.put(entry); serverLog.logFine("PLASMA","STORED new LURL " + url.toString()); this.stored = true; - existsIndex.put(urlHash, Boolean.TRUE); } catch (Exception e) { serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString(), e); } - } } public String hash() { diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index f4323d66e..cb8e6cd9b 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -45,7 +45,6 @@ package de.anomic.plasma; import java.io.File; import java.io.IOException; -import java.lang.Boolean; import java.net.URL; import java.util.ArrayList; import java.util.Date; @@ -561,10 +560,7 @@ public class plasmaCrawlNURL extends indexURL { this.flags.getBytes(), normalizeHandle(this.handle).getBytes() }; - synchronized(existsIndex) { - urlHashCache.put(entry); - existsIndex.put(this.hash, Boolean.TRUE); - } + urlHashCache.put(entry); } catch (IOException e) { serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaNURL:store:" + e.toString() + ", resetting NURL-DB"); e.printStackTrace(); @@ -622,23 +618,4 @@ public class plasmaCrawlNURL extends indexURL { } } - /* - public class kenum implements Enumeration { - // enumerates entry elements - kelondroTree.rowIterator i; - public kenum(boolean up, boolean rotating) throws IOException { - i = urlHashCache.rows(up, rotating); - } - public boolean hasMoreElements() { - return i.hasNext(); - } - public Object nextElement() { - return new entry(new String(((byte[][]) i.next())[0])); - } - } - public Enumeration elements(boolean up, boolean rotating) throws IOException { - // enumerates entry elements - return new kenum(up, rotating); - } - */ } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index c6fbc7c9c..16067b21c 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1024,19 +1024,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // clean up profiles if (cleanProfiles()) hasDoneSomething = true; - // clean up existsIndex - if (urlPool.errorURL.existsIndexSize() > 10000) { - log.logFine("Cleaning Error-URLs exists index, " + urlPool.errorURL.existsIndexSize() + " entries in index"); - urlPool.errorURL.clearExistsIndex(); - } - if (urlPool.noticeURL.existsIndexSize() > 10000) { - log.logFine("Cleaning Notice-URLs exists index, " + urlPool.noticeURL.existsIndexSize() + " entries in index"); - urlPool.noticeURL.clearExistsIndex(); - } - if (urlPool.loadedURL.existsIndexSize() > 100000) { - log.logFine("Cleaning Loaded-URLs exists index, " + urlPool.loadedURL.existsIndexSize() + " entries in index"); - urlPool.loadedURL.clearExistsIndex(); - } // clean up news try { diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 94c17e3ec..336e4fd35 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -64,8 +64,6 @@ import java.net.URL; import java.net.UnknownHostException; import java.nio.channels.ClosedByInterruptException; import java.security.KeyStore; -import java.security.PublicKey; -import java.security.cert.Certificate; import java.util.Enumeration; import java.util.Hashtable; @@ -73,7 +71,6 @@ import javax.net.ssl.HandshakeCompletedEvent; import javax.net.ssl.HandshakeCompletedListener; import javax.net.ssl.KeyManagerFactory; import javax.net.ssl.SSLContext; -import javax.net.ssl.SSLSession; import javax.net.ssl.SSLSocket; import javax.net.ssl.SSLSocketFactory;