- integrated cache miss storage into object cache

- removed cache-miss handling from indexURL
todo: new Monitoring in PerformanceMemory_p

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2132 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 757ec28430
commit cda087f43b

@ -285,13 +285,13 @@ public class PerformanceMemory_p {
*/
// other caching structures
long amount = sb.urlPool.errorURL.existsIndexSize();
long amount = 0;
prop.put("eurl.existsIndexAmount",Long.toString(amount));
prop.put("eurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28)));
amount = sb.urlPool.noticeURL.existsIndexSize();
amount = 0;
prop.put("nurl.existsIndexAmount",Long.toString(amount));
prop.put("nurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28)));
amount = sb.urlPool.loadedURL.existsIndexSize();
amount = 0;
prop.put("lurl.existsIndexAmount",Long.toString(amount));
prop.put("lurl.existsIndexSize",serverMemory.bytesToString(amount*(2*indexURL.urlHashLength+28)));

@ -1,7 +1,6 @@
package de.anomic.index;
import java.io.IOException;
import java.lang.Boolean;
import java.net.URL;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
@ -383,11 +382,9 @@ public class indexURL {
// the class object
protected kelondroTree urlHashCache;
protected final HashMap existsIndex; // allow subclasses to access the existsIndex during Entry.store()
public indexURL() {
urlHashCache = null;
existsIndex = new HashMap();
}
public int size() {
@ -410,47 +407,6 @@ public class indexURL {
return urlHashCache.cacheObjectStatus();
}
public boolean exists(String urlHash) {
synchronized (existsIndex) {
Boolean existsInIndex = (Boolean) existsIndex.get(urlHash);
if (existsInIndex != null) return existsInIndex.booleanValue();
try {
if (urlHashCache.get(urlHash.getBytes()) != null) {
existsIndex.put(urlHash, Boolean.TRUE);
return true;
} else {
existsIndex.put(urlHash, Boolean.FALSE);
return false;
}
} catch (IOException e) {
return false;
}
}
}
public long existsIndexSize() {
return this.existsIndex.size();
}
public boolean remove(String urlHash) {
synchronized (existsIndex) {
try {
Boolean existsInIndex = (Boolean) existsIndex.remove(urlHash);
if (existsInIndex == null) existsInIndex = Boolean.FALSE;
boolean existsInCache = (this.urlHashCache.remove(urlHash.getBytes()) != null);
existsIndex.put(urlHash, Boolean.FALSE);
return existsInIndex.booleanValue() || existsInCache;
} catch (IOException e) {
return false;
}
}
}
public void clearExistsIndex() {
synchronized (existsIndex) {
existsIndex.clear();
}
}
public static final int flagTypeID(String hash) {
return (kelondroBase64Order.enhancedCoder.decodeByte(hash.charAt(11)) & 32) >> 5;

@ -61,18 +61,20 @@ import java.util.TreeMap;
public class kelondroObjectCache {
private final TreeMap cache;
private final kelondroMScoreCluster ages;
private final kelondroMScoreCluster ages, hasnot;
private long startTime;
private int maxSize;
private long maxAge;
private long minMem;
private int readHit, readMiss, writeUnique, writeDouble;
private int hasnotHit, hasnotMiss, hasnotUnique, hasnotDouble;
private String name;
public kelondroObjectCache(String name, int maxSize, long maxAge, long minMem) {
this.name = name;
this.cache = new TreeMap();
this.ages = new kelondroMScoreCluster();
this.hasnot = new kelondroMScoreCluster();
this.startTime = System.currentTimeMillis();
this.maxSize = Math.max(maxSize, 1);
this.maxAge = Math.max(maxAge, 10000);
@ -81,6 +83,10 @@ public class kelondroObjectCache {
this.readMiss = 0;
this.writeUnique = 0;
this.writeDouble = 0;
this.hasnotHit = 0;
this.hasnotMiss = 0;
this.hasnotUnique = 0;
this.hasnotDouble = 0;
}
public String getName() {
@ -170,27 +176,77 @@ public class kelondroObjectCache {
synchronized(cache) {
prev = cache.put(key, value);
ages.setScore(key, intTime(System.currentTimeMillis()));
hasnot.deleteScore(key);
}
if (prev == null) this.writeUnique++; else this.writeDouble++;
flush();
flushc();
}
public Object get(byte[] key) {
if (key == null) return null;
Object r = cache.get(new String(key));
flush();
if (r == null) this.readMiss++; else this.readHit++;
flushc();
if (r == null) {
this.readMiss++;
} else {
hasnot.deleteScore(key);
this.readHit++;
}
return r;
}
public Object get(String key) {
if (key == null) return null;
Object r = cache.get(key);
flush();
if (r == null) this.readMiss++; else this.readHit++;
flushc();
if (r == null) {
this.readMiss++;
} else {
hasnot.deleteScore(key);
this.readHit++;
}
return r;
}
public void hasnot(byte[] key) {
hasnot(new String(key));
}
public void hasnot(String key) {
if (key == null) return;
int prev = 0;
synchronized(cache) {
cache.remove(key);
ages.deleteScore(key);
prev = hasnot.getScore(key);
hasnot.setScore(key, intTime(System.currentTimeMillis()));
}
if (prev == 0) this.hasnotUnique++; else this.hasnotDouble++;
flushh();
}
public int has(byte[] key) {
return has(new String(key));
}
public int has(String key) {
// returns a 3-value boolean:
// 1 = key definitely exists
// -1 = key definitely does not exist
// 0 = unknown, if key exists
if (key == null) return 0;
synchronized(cache) {
if (hasnot.getScore(key) > 0) {
this.hasnotHit++;
return -1;
}
this.hasnotMiss++;
if (cache.get(key) != null) return 1;
}
flushh();
return 0;
}
public void remove(byte[] key) {
remove(new String(key));
}
@ -200,34 +256,37 @@ public class kelondroObjectCache {
synchronized(cache) {
cache.remove(key);
ages.deleteScore(key);
hasnot.setScore(key, intTime(System.currentTimeMillis()));
}
flush();
}
public void flush() {
public void flushc() {
String k;
synchronized(cache) {
while ((ages.size() > 0) &&
((k = bestFlush()) != null) &&
((size() > maxSize) ||
((k = (String) ages.getMinObject()) != null) &&
((ages.size() > maxSize) ||
((System.currentTimeMillis() - longEmit(ages.getScore(k))) > maxAge) ||
(Runtime.getRuntime().freeMemory() < minMem))
) {
cache.remove(k);
ages.deleteScore(k);
//if (Runtime.getRuntime().freeMemory() < minMem) System.gc(); // prevent unnecessary loops
}
}
}
public String bestFlush() {
if (cache.size() == 0) return null;
try {
synchronized (cache) {
return (String) ages.getMinObject(); // flush oldest entries
}
} catch (Exception e) {}
return null;
public void flushh() {
String k;
synchronized(cache) {
while ((hasnot.size() > 0) &&
((k = (String) hasnot.getMinObject()) != null) &&
((hasnot.size() > maxSize) ||
((System.currentTimeMillis() - longEmit(hasnot.getScore(k))) > maxAge) ||
(Runtime.getRuntime().freeMemory() < minMem))
) {
hasnot.deleteScore(k);
}
}
}
}

@ -228,6 +228,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
//System.out.println("cache hit in objectCache, db:" + super.filename);
return result;
}
if ((objectCache != null) && (objectCache.has(key) == -1)) return null;
synchronized (writeSearchObj) {
writeSearchObj.process(key);
if (writeSearchObj.found()) {
@ -235,6 +236,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
if (objectCache != null) objectCache.put(key, result);
} else {
result = null;
if (objectCache != null) objectCache.hasnot(key);
}
}
return result;

@ -45,7 +45,6 @@ package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.lang.Boolean;
import java.net.URL;
import java.util.Date;
import java.util.Enumeration;
@ -118,6 +117,22 @@ public class plasmaCrawlEURL extends indexURL {
return new Entry(hash);
}
public boolean exists(String urlHash) {
try {
return (urlHashCache.get(urlHash.getBytes()) != null);
} catch (IOException e) {
return false;
}
}
public boolean remove(String urlHash) {
try {
return (this.urlHashCache.remove(urlHash.getBytes()) != null);
} catch (IOException e) {
return false;
}
}
public void clearStack() {
rejectedStack.clear();
}
@ -207,11 +222,8 @@ public class plasmaCrawlEURL extends indexURL {
kelondroBase64Order.enhancedCoder.encodeLong(this.trycount, urlRetryLength).getBytes(),
this.failreason.getBytes(),
this.flags.getBytes()
};
synchronized(existsIndex) {
urlHashCache.put(entry);
existsIndex.put(this.hash, Boolean.TRUE);
}
};
urlHashCache.put(entry);
} catch (IOException e) {
System.out.println("INTERNAL ERROR AT plasmaEURL:url2hash:" + e.toString());
}

@ -54,7 +54,6 @@ package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.lang.Boolean;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
@ -270,19 +269,41 @@ public final class plasmaCrawlLURL extends indexURL {
}
public boolean remove(String urlHash) {
boolean exists1 = super.remove(urlHash);
for (int stack = 1; stack <= 6; stack++) {
for (int i = getStackSize(stack) - 1; i >= 0; i--) {
if (getUrlHash(stack,i).equals(urlHash)) {
boolean exits2 = removeStack(stack,i);
exists1 = exists1 || exits2;
return exists1;
return removeStack(stack,i);
}
}
}
return exists1;
return false;
}
public boolean exists(String urlHash) {
try {
if (urlHashCache.get(urlHash.getBytes()) != null) {
return true;
} else {
return false;
}
} catch (IOException e) {
return false;
}
}
/*
public long existsIndexSize() {
return this.existsIndex.size();
}
public void clearExistsIndex() {
synchronized (existsIndex) {
existsIndex.clear();
}
}
*/
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
private static String daydate(Date date) {
if (date == null) {
@ -498,11 +519,10 @@ public final class plasmaCrawlLURL extends indexURL {
public void store() {
// Check if there is a more recent Entry already in the DB
if (this.stored) return;
synchronized(existsIndex) {
Entry oldEntry;
try {
if (exists(urlHash)) {
oldEntry = new Entry (urlHash, null);
oldEntry = new Entry(urlHash, null);
} else {
oldEntry = null;
}
@ -554,11 +574,9 @@ public final class plasmaCrawlLURL extends indexURL {
urlHashCache.put(entry);
serverLog.logFine("PLASMA","STORED new LURL " + url.toString());
this.stored = true;
existsIndex.put(urlHash, Boolean.TRUE);
} catch (Exception e) {
serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaCrawlLURL:store:" + e.toString(), e);
}
}
}
public String hash() {

@ -45,7 +45,6 @@ package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.lang.Boolean;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
@ -561,10 +560,7 @@ public class plasmaCrawlNURL extends indexURL {
this.flags.getBytes(),
normalizeHandle(this.handle).getBytes()
};
synchronized(existsIndex) {
urlHashCache.put(entry);
existsIndex.put(this.hash, Boolean.TRUE);
}
urlHashCache.put(entry);
} catch (IOException e) {
serverLog.logSevere("PLASMA", "INTERNAL ERROR AT plasmaNURL:store:" + e.toString() + ", resetting NURL-DB");
e.printStackTrace();
@ -622,23 +618,4 @@ public class plasmaCrawlNURL extends indexURL {
}
}
/*
public class kenum implements Enumeration {
// enumerates entry elements
kelondroTree.rowIterator i;
public kenum(boolean up, boolean rotating) throws IOException {
i = urlHashCache.rows(up, rotating);
}
public boolean hasMoreElements() {
return i.hasNext();
}
public Object nextElement() {
return new entry(new String(((byte[][]) i.next())[0]));
}
}
public Enumeration elements(boolean up, boolean rotating) throws IOException {
// enumerates entry elements
return new kenum(up, rotating);
}
*/
}

@ -1024,19 +1024,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
// clean up profiles
if (cleanProfiles()) hasDoneSomething = true;
// clean up existsIndex
if (urlPool.errorURL.existsIndexSize() > 10000) {
log.logFine("Cleaning Error-URLs exists index, " + urlPool.errorURL.existsIndexSize() + " entries in index");
urlPool.errorURL.clearExistsIndex();
}
if (urlPool.noticeURL.existsIndexSize() > 10000) {
log.logFine("Cleaning Notice-URLs exists index, " + urlPool.noticeURL.existsIndexSize() + " entries in index");
urlPool.noticeURL.clearExistsIndex();
}
if (urlPool.loadedURL.existsIndexSize() > 100000) {
log.logFine("Cleaning Loaded-URLs exists index, " + urlPool.loadedURL.existsIndexSize() + " entries in index");
urlPool.loadedURL.clearExistsIndex();
}
// clean up news
try {

@ -64,8 +64,6 @@ import java.net.URL;
import java.net.UnknownHostException;
import java.nio.channels.ClosedByInterruptException;
import java.security.KeyStore;
import java.security.PublicKey;
import java.security.cert.Certificate;
import java.util.Enumeration;
import java.util.Hashtable;
@ -73,7 +71,6 @@ import javax.net.ssl.HandshakeCompletedEvent;
import javax.net.ssl.HandshakeCompletedListener;
import javax.net.ssl.KeyManagerFactory;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocket;
import javax.net.ssl.SSLSocketFactory;

Loading…
Cancel
Save