added automatic cleaning of cache if metadata and file database size is

not equal. It might happen that these data is different because one of
that caches is cleaned after a while or when it is too big. The metadata
is then not cleaned, but now wiped after a checkup process at every
application start. This should cause a bit less memory usage.
pull/1/head
Michael Peter Christen 13 years ago
parent d0ec8018f5
commit 3dd8376825

@ -32,7 +32,6 @@ import java.io.IOException;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache; import de.anomic.crawler.Cache;
import de.anomic.data.WorkTables; import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -62,7 +61,7 @@ public class ConfigHTCache_p {
// proxyCacheSize // proxyCacheSize
final int newProxyCacheSize = Math.max(post.getInt("maxCacheSize", 64), 4); final int newProxyCacheSize = Math.max(post.getInt("maxCacheSize", 64), 4);
env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize); env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize);
Cache.setMaxCacheSize(newProxyCacheSize * 1024 * 1024); Cache.setMaxCacheSize(newProxyCacheSize * 1024L * 1024L);
} }
if (post != null && post.containsKey("deletecomplete")) { if (post != null && post.containsKey("deletecomplete")) {

@ -39,6 +39,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
@ -47,6 +48,7 @@ import net.yacy.kelondro.blob.Compressor;
import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
@ -57,7 +59,7 @@ public final class Cache {
private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap"; private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap";
private static final String FILE_DB_NAME = "file.array"; private static final String FILE_DB_NAME = "file.array";
private static Map<byte[], Map<String, String>> responseHeaderDB = null; private static MapHeap responseHeaderDB = null;
private static Compressor fileDB = null; private static Compressor fileDB = null;
private static ArrayStack fileDBunbuffered = null; private static ArrayStack fileDBunbuffered = null;
@ -84,6 +86,7 @@ public final class Cache {
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
} }
// open the cache file
try { try {
fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, 1024 * 1024 * 2, false); fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, 1024 * 1024 * 2, false);
fileDBunbuffered.setMaxSize(maxCacheSize); fileDBunbuffered.setMaxSize(maxCacheSize);
@ -91,6 +94,53 @@ public final class Cache {
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
} }
Log.logInfo("Cache", "initialized cache database responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
// clean up the responseHeaderDB which cannot be cleaned the same way as the cache files.
// We do this as a concurrent job only once after start-up silently
if (responseHeaderDB.size() != fileDB.size()) {
Log.logWarning("Cache", "file and metadata size is not equal, starting a cleanup thread...");
Thread startupCleanup = new Thread() {
@Override
public void run() {
// enumerate the responseHeaderDB and find out all entries that are not inside the fileDBunbuffered
BlockingQueue<byte[]> q = responseHeaderDB.keyQueue(1000);
final HandleSet delkeys = new HandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 1);
Log.logInfo("Cache", "started cleanup thread to remove unused cache metadata");
try {
byte[] k;
while (((k = q.take()) != MapHeap.POISON_QUEUE_ENTRY)) {
if (!fileDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
} catch (InterruptedException e) {
} finally {
// delete the collected keys from the metadata
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused metadata entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
responseHeaderDB.delete(k);
} catch (IOException e) {
}
}
}
Log.logInfo("Cache", "running check to remove unused file cache data");
delkeys.clear();
for (byte[] k: fileDB) {
if (!responseHeaderDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused cache entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
fileDB.delete(k);
} catch (IOException e) {
}
}
Log.logInfo("Cache", "terminated cleanup thread; responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
}
};
startupCleanup.start();
}
} }
/** /**
@ -131,9 +181,7 @@ public final class Cache {
* close the databases * close the databases
*/ */
public static void close() { public static void close() {
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.close();
((MapHeap) responseHeaderDB).close();
}
fileDB.close(true); fileDB.close(true);
} }
@ -156,12 +204,9 @@ public final class Cache {
hm.putAll(responseHeader); hm.putAll(responseHeader);
hm.put("@@URL", url.toNormalform(true, false)); hm.put("@@URL", url.toNormalform(true, false));
try { try {
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.insert(url.hash(), hm);
((MapHeap) responseHeaderDB).insert(url.hash(), hm);
} else {
responseHeaderDB.put(url.hash(), hm);
}
} catch (final Exception e) { } catch (final Exception e) {
fileDB.delete(url.hash());
throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage()); throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage());
} }
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false)); if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false));
@ -184,11 +229,7 @@ public final class Cache {
// if not both is there then we do a clean-up // if not both is there then we do a clean-up
if (headerExists) try { if (headerExists) try {
log.logWarning("header but not content of urlhash " + ASCII.String(urlhash) + " in cache; cleaned up"); log.logWarning("header but not content of urlhash " + ASCII.String(urlhash) + " in cache; cleaned up");
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.delete(urlhash);
((MapHeap) responseHeaderDB).delete(urlhash);
} else {
responseHeaderDB.remove(urlhash);
}
} catch (final IOException e) {} } catch (final IOException e) {}
if (fileExists) try { if (fileExists) try {
//log.logWarning("content but not header of url " + url.toString() + " in cache; cleaned up"); //log.logWarning("content but not header of url " + url.toString() + " in cache; cleaned up");
@ -209,8 +250,14 @@ public final class Cache {
public static ResponseHeader getResponseHeader(final byte[] hash) { public static ResponseHeader getResponseHeader(final byte[] hash) {
// loading data from database // loading data from database
Map<String, String> hdb; Map<String, String> hdb = null;
try {
hdb = responseHeaderDB.get(hash); hdb = responseHeaderDB.get(hash);
} catch (IOException e) {
return null;
} catch (RowSpaceExceededException e) {
return null;
}
if (hdb == null) return null; if (hdb == null) return null;
return new ResponseHeader(null, hdb); return new ResponseHeader(null, hdb);
@ -251,11 +298,7 @@ public final class Cache {
* @throws IOException * @throws IOException
*/ */
public static void delete(final byte[] hash) throws IOException { public static void delete(final byte[] hash) throws IOException {
if (responseHeaderDB instanceof MapHeap) { responseHeaderDB.delete(hash);
((MapHeap) responseHeaderDB).delete(hash);
} else {
responseHeaderDB.remove(hash);
}
fileDB.delete(hash); fileDB.delete(hash);
} }
} }

@ -32,6 +32,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
@ -45,7 +46,7 @@ import net.yacy.kelondro.util.ByteArray;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
public class Compressor implements BLOB { public class Compressor implements BLOB, Iterable<byte[]> {
static byte[] gzipMagic = {(byte) 'z', (byte) '|'}; // magic for gzip-encoded content static byte[] gzipMagic = {(byte) 'z', (byte) '|'}; // magic for gzip-encoded content
static byte[] plainMagic = {(byte) 'p', (byte) '|'}; // magic for plain content (no encoding) static byte[] plainMagic = {(byte) 'p', (byte) '|'}; // magic for plain content (no encoding)
@ -61,18 +62,22 @@ public class Compressor implements BLOB {
initBuffer(); initBuffer();
} }
@Override
public long mem() { public long mem() {
return this.backend.mem(); return this.backend.mem();
} }
@Override
public void trim() { public void trim() {
this.backend.trim(); this.backend.trim();
} }
@Override
public String name() { public String name() {
return this.backend.name(); return this.backend.name();
} }
@Override
public synchronized void clear() throws IOException { public synchronized void clear() throws IOException {
initBuffer(); initBuffer();
this.backend.clear(); this.backend.clear();
@ -83,10 +88,12 @@ public class Compressor implements BLOB {
this.bufferlength = 0; this.bufferlength = 0;
} }
@Override
public ByteOrder ordering() { public ByteOrder ordering() {
return this.backend.ordering(); return this.backend.ordering();
} }
@Override
public synchronized void close(final boolean writeIDX) { public synchronized void close(final boolean writeIDX) {
// no more thread is running, flush all queues // no more thread is running, flush all queues
flushAll(); flushAll();
@ -164,6 +171,7 @@ public class Compressor implements BLOB {
} }
} }
@Override
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException { public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
// depending on the source of the result, we additionally do entry compression // depending on the source of the result, we additionally do entry compression
// because if a document was read once, we think that it will not be retrieved another time again soon // because if a document was read once, we think that it will not be retrieved another time again soon
@ -186,6 +194,7 @@ public class Compressor implements BLOB {
return decompress(b); return decompress(b);
} }
@Override
public byte[] get(final Object key) { public byte[] get(final Object key) {
if (!(key instanceof byte[])) return null; if (!(key instanceof byte[])) return null;
try { try {
@ -198,16 +207,19 @@ public class Compressor implements BLOB {
return null; return null;
} }
@Override
public boolean containsKey(final byte[] key) { public boolean containsKey(final byte[] key) {
synchronized (this) { synchronized (this) {
return this.buffer.containsKey(key) || this.backend.containsKey(key); return this.buffer.containsKey(key) || this.backend.containsKey(key);
} }
} }
@Override
public int keylength() { public int keylength() {
return this.backend.keylength(); return this.backend.keylength();
} }
@Override
public synchronized long length() { public synchronized long length() {
try { try {
return this.backend.length() + this.bufferlength; return this.backend.length() + this.bufferlength;
@ -217,6 +229,7 @@ public class Compressor implements BLOB {
} }
} }
@Override
public long length(final byte[] key) throws IOException { public long length(final byte[] key) throws IOException {
synchronized (this) { synchronized (this) {
byte[] b = this.buffer.get(key); byte[] b = this.buffer.get(key);
@ -238,6 +251,7 @@ public class Compressor implements BLOB {
return 0; return 0;
} }
@Override
public void insert(final byte[] key, final byte[] b) throws IOException { public void insert(final byte[] key, final byte[] b) throws IOException {
// first ensure that the files do not exist anywhere // first ensure that the files do not exist anywhere
@ -265,32 +279,47 @@ public class Compressor implements BLOB {
if (MemoryControl.shortStatus()) flushAll(); if (MemoryControl.shortStatus()) flushAll();
} }
@Override
public synchronized void delete(final byte[] key) throws IOException { public synchronized void delete(final byte[] key) throws IOException {
this.backend.delete(key); this.backend.delete(key);
final long rx = removeFromQueues(key); final long rx = removeFromQueues(key);
if (rx > 0) this.bufferlength -= rx; if (rx > 0) this.bufferlength -= rx;
} }
@Override
public synchronized int size() { public synchronized int size() {
return this.backend.size() + this.buffer.size(); return this.backend.size() + this.buffer.size();
} }
@Override
public synchronized boolean isEmpty() { public synchronized boolean isEmpty() {
if (!this.backend.isEmpty()) return false; if (!this.backend.isEmpty()) return false;
if (!this.buffer.isEmpty()) return false; if (!this.buffer.isEmpty()) return false;
return true; return true;
} }
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException { public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
flushAll(); flushAll();
return this.backend.keys(up, rotating); return this.backend.keys(up, rotating);
} }
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException { public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
flushAll(); flushAll();
return this.backend.keys(up, firstKey); return this.backend.keys(up, firstKey);
} }
@Override
public Iterator<byte[]> iterator() {
flushAll();
try {
return this.backend.keys(true, false);
} catch (IOException e) {
return null;
}
}
private boolean flushOne() { private boolean flushOne() {
if (this.buffer.isEmpty()) return false; if (this.buffer.isEmpty()) return false;
// depending on process case, write it to the file or compress it to the other queue // depending on process case, write it to the file or compress it to the other queue
@ -312,6 +341,7 @@ public class Compressor implements BLOB {
} }
} }
@Override
public int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException { public int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException {
final byte[] b = get(key); final byte[] b = get(key);
if (b == null) return 0; if (b == null) return 0;
@ -323,6 +353,7 @@ public class Compressor implements BLOB {
return reduction; return reduction;
} }
@Override
public int reduce(final byte[] key, final Reducer reducer) throws IOException, RowSpaceExceededException { public int reduce(final byte[] key, final Reducer reducer) throws IOException, RowSpaceExceededException {
final byte[] b = get(key); final byte[] b = get(key);
if (b == null) return 0; if (b == null) return 0;
@ -334,4 +365,5 @@ public class Compressor implements BLOB {
return reduction; return reduction;
} }
} }

@ -39,6 +39,8 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
@ -57,8 +59,8 @@ import net.yacy.kelondro.util.MemoryControl;
public class MapHeap implements Map<byte[], Map<String, String>> { public class MapHeap implements Map<byte[], Map<String, String>> {
private BLOB blob; private final BLOB blob;
private ARC<byte[], Map<String, String>> cache; private final ARC<byte[], Map<String, String>> cache;
private final char fillchar; private final char fillchar;
@ -445,11 +447,10 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
* close the Map table * close the Map table
*/ */
public synchronized void close() { public synchronized void close() {
this.cache = null; this.cache.clear();
// close file // close file
if (this.blob != null) this.blob.close(true); if (this.blob != null) this.blob.close(true);
this.blob = null;
} }
@Override @Override
@ -516,6 +517,29 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
return set; return set;
} }
public final static byte[] POISON_QUEUE_ENTRY = "POISON".getBytes();
public BlockingQueue<byte[]> keyQueue(final int size) {
final ArrayBlockingQueue<byte[]> set = new ArrayBlockingQueue<byte[]>(size);
(new Thread() {
@Override
public void run() {
try {
final Iterator<byte[]> i = MapHeap.this.blob.keys(true, false);
while (i.hasNext())
try {
set.put(i.next());
} catch (InterruptedException e) {
break;
}
} catch (final IOException e) {}
try {
set.put(MapHeap.POISON_QUEUE_ENTRY);
} catch (InterruptedException e) {
}
}}).start();
return set;
}
@Override @Override
public Collection<Map<String, String>> values() { public Collection<Map<String, String>> values() {
// this method shall not be used because it is not appropriate for this kind of data // this method shall not be used because it is not appropriate for this kind of data

Loading…
Cancel
Save