- integrated cache miss storage into object cache

- removed cache-miss handling from indexURL
todo: new Monitoring in PerformanceMemory_p

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2132 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 757ec28430
commit cda087f43b

@ -285,13 +285,13 @@ public class PerformanceMemory_p {
*/ */
// other caching structures // other caching structures
long amount = sb.urlPool.errorURL.existsIndexSize(); long amount = 0;
prop.put("eurl.existsIndexAmount",Long.toString(amount)); prop.put("eurl.existsIndexAmount",Long.toString(amount));
prop.put("eurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28))); prop.put("eurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28)));
amount = sb.urlPool.noticeURL.existsIndexSize(); amount = 0;
prop.put("nurl.existsIndexAmount",Long.toString(amount)); prop.put("nurl.existsIndexAmount",Long.toString(amount));
prop.put("nurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28))); prop.put("nurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28)));
amount = sb.urlPool.loadedURL.existsIndexSize(); amount = 0;
prop.put("lurl.existsIndexAmount",Long.toString(amount)); prop.put("lurl.existsIndexAmount",Long.toString(amount));
prop.put("lurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28))); prop.put("lurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28)));

@ -1,7 +1,6 @@
package de.anomic.index; package de.anomic.index;
import java.io.IOException; import java.io.IOException;
import java.lang.Boolean;
import java.net.URL; import java.net.URL;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
@ -383,11 +382,9 @@ public class indexURL {
// the class object // the class object
protected kelondroTree urlHashCache; protected kelondroTree urlHashCache;
protected final HashMap existsIndex; // allow subclasses to access the existsIndex during Entry.store()
public indexURL() { public indexURL() {
urlHashCache = null; urlHashCache = null;
existsIndex = new HashMap();
} }
public int size() { public int size() {
@ -410,47 +407,6 @@ public class indexURL {
return urlHashCache.cacheObjectStatus(); return urlHashCache.cacheObjectStatus();
} }
public boolean exists(String urlHash) {
synchronized (existsIndex) {
Boolean existsInIndex = (Boolean) existsIndex.get(urlHash);
if (existsInIndex != null) return existsInIndex.booleanValue();
try {
if (urlHashCache.get(urlHash.getBytes()) != null) {
existsIndex.put(urlHash, Boolean.TRUE);
return true;
} else {
existsIndex.put(urlHash, Boolean.FALSE);
return false;
}
} catch (IOException e) {
return false;
}
}
}
public long existsIndexSize() {
return this.existsIndex.size();
}
public boolean remove(String urlHash) {
synchronized (existsIndex) {
try {
Boolean existsInIndex = (Boolean) existsIndex.remove(urlHash);
if (existsInIndex == null) existsInIndex = Boolean.FALSE;
boolean existsInCache = (this.urlHashCache.remove(urlHash.getBytes()) != null);
existsIndex.put(urlHash, Boolean.FALSE);
return existsInIndex.booleanValue() || existsInCache;
} catch (IOException e) {
return false;
}
}
}
public void clearExistsIndex() {
synchronized (existsIndex) {
existsIndex.clear();
}
}
public static final int flagTypeID(String hash) { public static final int flagTypeID(String hash) {
return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5; return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5;

@ -61,18 +61,20 @@ import java.util.TreeMap;
public class kelondroObjectCache { public class kelondroObjectCache {
private final TreeMap cache; private final TreeMap cache;
private final kelondroMScoreCluster ages; private final kelondroMScoreCluster ages, hasnot;
private long startTime; private long startTime;
private int maxSize; private int maxSize;
private long maxAge; private long maxAge;
private long minMem; private long minMem;
private int readHit, readMiss, writeUnique, writeDouble; private int readHit, readMiss, writeUnique, writeDouble;
private int hasnotHit, hasnotMiss, hasnotUnique, hasnotDouble;
private String name; private String name;
public kelondroObjectCache(String name, int maxSize, long maxAge, long minMem) { public kelondroObjectCache(String name, int maxSize, long maxAge, long minMem) {
this.name = name; this.name = name;
this.cache = new TreeMap(); this.cache = new TreeMap();
this.ages = new kelondroMScoreCluster(); this.ages = new kelondroMScoreCluster();
this.hasnot = new kelondroMScoreCluster();
this.startTime = System.currentTimeMillis(); this.startTime = System.currentTimeMillis();
this.maxSize = Math.max(maxSize, 1); this.maxSize = Math.max(maxSize, 1);
this.maxAge = Math.max(maxAge, 10000); this.maxAge = Math.max(maxAge, 10000);
@ -81,6 +83,10 @@ public class kelondroObjectCache {
this.readMiss = 0; this.readMiss = 0;
this.writeUnique = 0; this.writeUnique = 0;
this.writeDouble = 0; this.writeDouble = 0;
this.hasnotHit = 0;
this.hasnotMiss = 0;
this.hasnotUnique = 0;
this.hasnotDouble = 0;
} }
public String getName() { public String getName() {
@ -170,27 +176,77 @@ public class kelondroObjectCache {
synchronized(cache) { synchronized(cache) {
prev = cache.put(key, value); prev = cache.put(key, value);
ages.setScore(key, intTime(System.currentTimeMillis())); ages.setScore(key, intTime(System.currentTimeMillis()));
hasnot.deleteScore(key);
} }
if (prev == null) this.writeUnique++; else this.writeDouble++; if (prev == null) this.writeUnique++; else this.writeDouble++;
flush(); flushc();
} }
public Object get(byte[] key) { public Object get(byte[] key) {
if (key == null) return null; if (key == null) return null;
Object r = cache.get(new String(key)); Object r = cache.get(new String(key));
flush(); flushc();
if (r == null) this.readMiss++; else this.readHit++; if (r == null) {
this.readMiss++;
} else {
hasnot.deleteScore(key);
this.readHit++;
}
return r; return r;
} }
public Object get(String key) { public Object get(String key) {
if (key == null) return null; if (key == null) return null;
Object r = cache.get(key); Object r = cache.get(key);
flush(); flushc();
if (r == null) this.readMiss++; else this.readHit++; if (r == null) {
this.readMiss++;
} else {
hasnot.deleteScore(key);
this.readHit++;
}
return r; return r;
} }
public void hasnot(byte[] key) {
hasnot(new String(key));
}
public void hasnot(String key) {
if (key == null) return;
int prev = 0;
synchronized(cache) {
cache.remove(key);
ages.deleteScore(key);
prev = hasnot.getScore(key);
hasnot.setScore(key, intTime(System.currentTimeMillis()));
}
if (prev == 0) this.hasnotUnique++; else this.hasnotDouble++;
flushh();
}
public int has(byte[] key) {
return has(new String(key));
}
public int has(String key) {
// returns a 3-value boolean:
// 1 = key definitely exists
// -1 = key definitely does not exist
// 0 = unknown, if key exists
if (key == null) return 0;
synchronized(cache) {
if (hasnot.getScore(key) > 0) {
this.hasnotHit++;
return -1;
}
this.hasnotMiss++;
if (cache.get(key) != null) return 1;
}
flushh();
return 0;
}
public void remove(byte[] key) { public void remove(byte[] key) {
remove(new String(key)); remove(new String(key));
} }
@ -200,34 +256,37 @@ public class kelondroObjectCache {
synchronized(cache) { synchronized(cache) {
cache.remove(key); cache.remove(key);
ages.deleteScore(key); ages.deleteScore(key);
hasnot.setScore(key, intTime(System.currentTimeMillis()));
} }
flush();
} }
public void flush() { public void flushc() {
String k; String k;
synchronized(cache) { synchronized(cache) {
while ((ages.size() > 0) && while ((ages.size() > 0) &&
((k = bestFlush()) != null) && ((k = (String) ages.getMinObject()) != null) &&
((size() > maxSize) || ((ages.size() > maxSize) ||
((System.currentTimeMillis() - longEmit(ages.getScore(k))) > maxAge) || ((System.currentTimeMillis() - longEmit(ages.getScore(k))) > maxAge) ||
(Runtime.getRuntime().freeMemory() < minMem)) (Runtime.getRuntime().freeMemory() < minMem))
) { ) {
cache.remove(k); cache.remove(k);
ages.deleteScore(k); ages.deleteScore(k);
//if (Runtime.getRuntime().freeMemory() < minMem) System.gc(); // prevent unnecessary loops
} }
} }
} }
public String bestFlush() { public void flushh() {
if (cache.size() == 0) return null; String k;
try { synchronized(cache) {
synchronized (cache) { while ((hasnot.size() > 0) &&
return (String) ages.getMinObject(); // flush oldest entries ((k = (String) hasnot.getMinObject()) != null) &&
} ((hasnot.size() > maxSize) ||
} catch (Exception e) {} ((System.currentTimeMillis() - longEmit(hasnot.getScore(k))) > maxAge) ||
return null; (Runtime.getRuntime().freeMemory() < minMem))
) {
hasnot.deleteScore(k);
}
}
} }
} }

@ -228,6 +228,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
//System.out.println("cache hit in objectCache, db:" + super.filename); //System.out.println("cache hit in objectCache, db:" + super.filename);
return result; return result;
} }
if ((objectCache != null) && (objectCache.has(key) == -1)) return null;
synchronized (writeSearchObj) { synchronized (writeSearchObj) {
writeSearchObj.process(key); writeSearchObj.process(key);
if (writeSearchObj.found()) { if (writeSearchObj.found()) {
@ -235,6 +236,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
if (objectCache != null) objectCache.put(key, result); if (objectCache != null) objectCache.put(key, result);
} else { } else {
result = null; result = null;
if (objectCache != null) objectCache.hasnot(key);
} }
} }
return result; return result;

@ -45,7 +45,6 @@ package de.anomic.plasma;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.lang.Boolean;
import java.net.URL; import java.net.URL;
import java.util.Date; import java.util.Date;
import java.util.Enumeration; import java.util.Enumeration;
@ -118,6 +117,22 @@ public class plasmaCrawlEURL extends indexURL {
return new Entry(hash); return new Entry(hash);
} }
public boolean exists(String urlHash) {
try {
return (urlHashCache.get(urlHash.getBytes()) != null);
} catch (IOException e) {
return false;
}
}
public boolean remove(String urlHash) {
try {
return (this.urlHashCache.remove(urlHash.getBytes()) != null);
} catch (IOException e) {
return false;
}
}
public void clearStack() { public void clearStack() {
rejectedStack.clear(); rejectedStack.clear();
} }
@ -207,11 +222,8 @@ public class plasmaCrawlEURL extends indexURL {
kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, urlRetryLength).getBytes(), kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, urlRetryLength).getBytes(),
this.failreason.getBytes(), this.failreason.getBytes(),
this.flags.getBytes() this.flags.getBytes()
}; };
synchronized(existsIndex) { urlHashCache.put(entry);
urlHashCache.put(entry);
existsIndex.put(this.hash, Boolean.TRUE);
}
} catch (IOException e) { } catch (IOException e) {
System.out.println("INTERNAL ERROR AT plasmaEURL:url2hash:" + e.toString()); System.out.println("INTERNAL ERROR AT plasmaEURL:url2hash:" + e.toString());
} }

@ -54,7 +54,6 @@ package de.anomic.plasma;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.lang.Boolean;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
@ -270,19 +269,41 @@ public final class plasmaCrawlLURL extends indexURL {
} }
public boolean remove(String urlHash) { public boolean remove(String urlHash) {
boolean exists1 = super.remove(urlHash);
for (int stack = 1; stack <= 6; stack++) { for (int stack = 1; stack <= 6; stack++) {
for (int i = getStackSize(stack) - 1; i >= 0; i--) { for (int i = getStackSize(stack) - 1; i >= 0; i--) {
if (getUrlHash(stack,i).equals(urlHash)) { if (getUrlHash(stack,i).equals(urlHash)) {
boolean exits2 = removeStack(stack,i); return removeStack(stack,i);
exists1 = exists1 || exits2;
return exists1;
} }
} }
} }
return exists1; return false;
} }
public boolean exists(String urlHash) {
try {
if (urlHashCache.get(urlHash.getBytes()) != null) {
return true;
} else {
return false;
}
} catch (IOException e) {
return false;
}
}
/*
public long existsIndexSize() {
return this.existsIndex.size();
}
public void clearExistsIndex() {
synchronized (existsIndex) {
existsIndex.clear();
}
}
*/
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
private static String daydate(Date date) { private static String daydate(Date date) {
if (date == null) { if (date == null) {
@ -498,11 +519,10 @@ public final class plasmaCrawlLURL extends indexURL {
public void store() { public void store() {
// Check if there is a more recent Entry already in the DB // Check if there is a more recent Entry already in the DB
if (this.stored) return; if (this.stored) return;
synchronized(existsIndex) {
Entry oldEntry; Entry oldEntry;
try { try {
if (exists(urlHash)) { if (exists(urlHash)) {
oldEntry = new Entry (urlHash, null); oldEntry = new Entry(urlHash, null);
} else { } else {
oldEntry = null; oldEntry = null;
} }
@ -554,11 +574,9 @@ public final class plasmaCrawlLURL extends indexURL {
urlHashCache.put(entry); urlHashCache.put(entry);
serverLog.logFine("PLASMA","STORED new LURL " + url.toString()); serverLog.logFine("PLASMA","STORED new LURL " + url.toString());
this.stored = true; this.stored = true;
existsIndex.put(urlHash, Boolean.TRUE);
} catch (Exception e) { } catch (Exception e) {
serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString(), e); serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString(), e);
} }
}
} }
public String hash() { public String hash() {

@ -45,7 +45,6 @@ package de.anomic.plasma;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.lang.Boolean;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
@ -561,10 +560,7 @@ public class plasmaCrawlNURL extends indexURL {
this.flags.getBytes(), this.flags.getBytes(),
normalizeHandle(this.handle).getBytes() normalizeHandle(this.handle).getBytes()
}; };
synchronized(existsIndex) { urlHashCache.put(entry);
urlHashCache.put(entry);
existsIndex.put(this.hash, Boolean.TRUE);
}
} catch (IOException e) { } catch (IOException e) {
serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaNURL:store:" + e.toString() + ", resetting NURL-DB"); serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaNURL:store:" + e.toString() + ", resetting NURL-DB");
e.printStackTrace(); e.printStackTrace();
@ -622,23 +618,4 @@ public class plasmaCrawlNURL extends indexURL {
} }
} }
/*
public class kenum implements Enumeration {
// enumerates entry elements
kelondroTree.rowIterator i;
public kenum(boolean up, boolean rotating) throws IOException {
i = urlHashCache.rows(up, rotating);
}
public boolean hasMoreElements() {
return i.hasNext();
}
public Object nextElement() {
return new entry(new String(((byte[][]) i.next())[0]));
}
}
public Enumeration elements(boolean up, boolean rotating) throws IOException {
// enumerates entry elements
return new kenum(up, rotating);
}
*/
} }

@ -1024,19 +1024,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} }
// clean up profiles // clean up profiles
if (cleanProfiles()) hasDoneSomething = true; if (cleanProfiles()) hasDoneSomething = true;
// clean up existsIndex
if (urlPool.errorURL.existsIndexSize() > 10000) {
log.logFine("Cleaning Error-URLs exists index, " + urlPool.errorURL.existsIndexSize() + " entries in index");
urlPool.errorURL.clearExistsIndex();
}
if (urlPool.noticeURL.existsIndexSize() > 10000) {
log.logFine("Cleaning Notice-URLs exists index, " + urlPool.noticeURL.existsIndexSize() + " entries in index");
urlPool.noticeURL.clearExistsIndex();
}
if (urlPool.loadedURL.existsIndexSize() > 100000) {
log.logFine("Cleaning Loaded-URLs exists index, " + urlPool.loadedURL.existsIndexSize() + " entries in index");
urlPool.loadedURL.clearExistsIndex();
}
// clean up news // clean up news
try { try {

@ -64,8 +64,6 @@ import java.net.URL;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.nio.channels.ClosedByInterruptException; import java.nio.channels.ClosedByInterruptException;
import java.security.KeyStore; import java.security.KeyStore;
import java.security.PublicKey;
import java.security.cert.Certificate;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Hashtable; import java.util.Hashtable;
@ -73,7 +71,6 @@ import javax.net.ssl.HandshakeCompletedEvent;
import javax.net.ssl.HandshakeCompletedListener; import javax.net.ssl.HandshakeCompletedListener;
import javax.net.ssl.KeyManagerFactory; import javax.net.ssl.KeyManagerFactory;
import javax.net.ssl.SSLContext; import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocket; import javax.net.ssl.SSLSocket;
import javax.net.ssl.SSLSocketFactory; import javax.net.ssl.SSLSocketFactory;

Loading…
Cancel
Save