added automatic cleaning of cache if metadata and file database size is

not equal. It might happen that these data is different because one of
that caches is cleaned after a while or when it is too big. The metadata
is then not cleaned, but now wiped after a checkup process at every
application start. This should cause a bit less memory usage.
pull/1/head
Michael Peter Christen 13 years ago
parent d0ec8018f5
commit 3dd8376825

@ -1,4 +1,4 @@
// ConfigHTCache_p.java
// ConfigHTCache_p.java
// ---------------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
@ -32,7 +32,6 @@ import java.io.IOException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache;
import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects;
@ -59,12 +58,12 @@ public class ConfigHTCache_p {
cache.mkdirs();
}
// proxyCacheSize
// proxyCacheSize
final int newProxyCacheSize = Math.max(post.getInt("maxCacheSize", 64), 4);
env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize);
Cache.setMaxCacheSize(newProxyCacheSize * 1024 * 1024);
Cache.setMaxCacheSize(newProxyCacheSize * 1024L * 1024L);
}
if (post != null && post.containsKey("deletecomplete")) {
if ("on".equals(post.get("deleteCache", ""))) {
Cache.clear();

@ -39,6 +39,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.ResponseHeader;
@ -47,6 +48,7 @@ import net.yacy.kelondro.blob.Compressor;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
@ -57,7 +59,7 @@ public final class Cache {
private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap";
private static final String FILE_DB_NAME = "file.array";
private static Map<byte[], Map<String, String>> responseHeaderDB = null;
private static MapHeap responseHeaderDB = null;
private static Compressor fileDB = null;
private static ArrayStack fileDBunbuffered = null;
@ -84,6 +86,7 @@ public final class Cache {
} catch (final IOException e) {
Log.logException(e);
}
// open the cache file
try {
fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, 1024 * 1024 * 2, false);
fileDBunbuffered.setMaxSize(maxCacheSize);
@ -91,6 +94,53 @@ public final class Cache {
} catch (final IOException e) {
Log.logException(e);
}
Log.logInfo("Cache", "initialized cache database responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
// clean up the responseHeaderDB which cannot be cleaned the same way as the cache files.
// We do this as a concurrent job only once after start-up silently
if (responseHeaderDB.size() != fileDB.size()) {
Log.logWarning("Cache", "file and metadata size is not equal, starting a cleanup thread...");
Thread startupCleanup = new Thread() {
@Override
public void run() {
// enumerate the responseHeaderDB and find out all entries that are not inside the fileDBunbuffered
BlockingQueue<byte[]> q = responseHeaderDB.keyQueue(1000);
final HandleSet delkeys = new HandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 1);
Log.logInfo("Cache", "started cleanup thread to remove unused cache metadata");
try {
byte[] k;
while (((k = q.take()) != MapHeap.POISON_QUEUE_ENTRY)) {
if (!fileDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
} catch (InterruptedException e) {
} finally {
// delete the collected keys from the metadata
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused metadata entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
responseHeaderDB.delete(k);
} catch (IOException e) {
}
}
}
Log.logInfo("Cache", "running check to remove unused file cache data");
delkeys.clear();
for (byte[] k: fileDB) {
if (!responseHeaderDB.containsKey(k)) try { delkeys.put(k); } catch (RowSpaceExceededException e) { break; }
}
Log.logInfo("Cache", "cleanup thread collected " + delkeys.size() + " unused cache entries; now deleting them from the file...");
for (byte[] k: delkeys) {
try {
fileDB.delete(k);
} catch (IOException e) {
}
}
Log.logInfo("Cache", "terminated cleanup thread; responseHeaderDB.size() = " + responseHeaderDB.size() + ", fileDB.size() = " + fileDB.size());
}
};
startupCleanup.start();
}
}
/**
@ -131,9 +181,7 @@ public final class Cache {
* close the databases
*/
public static void close() {
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).close();
}
responseHeaderDB.close();
fileDB.close(true);
}
@ -156,12 +204,9 @@ public final class Cache {
hm.putAll(responseHeader);
hm.put("@@URL", url.toNormalform(true, false));
try {
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).insert(url.hash(), hm);
} else {
responseHeaderDB.put(url.hash(), hm);
}
responseHeaderDB.insert(url.hash(), hm);
} catch (final Exception e) {
fileDB.delete(url.hash());
throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage());
}
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false));
@ -184,11 +229,7 @@ public final class Cache {
// if not both is there then we do a clean-up
if (headerExists) try {
log.logWarning("header but not content of urlhash " + ASCII.String(urlhash) + " in cache; cleaned up");
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).delete(urlhash);
} else {
responseHeaderDB.remove(urlhash);
}
responseHeaderDB.delete(urlhash);
} catch (final IOException e) {}
if (fileExists) try {
//log.logWarning("content but not header of url " + url.toString() + " in cache; cleaned up");
@ -209,8 +250,14 @@ public final class Cache {
public static ResponseHeader getResponseHeader(final byte[] hash) {
// loading data from database
Map<String, String> hdb;
hdb = responseHeaderDB.get(hash);
Map<String, String> hdb = null;
try {
hdb = responseHeaderDB.get(hash);
} catch (IOException e) {
return null;
} catch (RowSpaceExceededException e) {
return null;
}
if (hdb == null) return null;
return new ResponseHeader(null, hdb);
@ -251,11 +298,7 @@ public final class Cache {
* @throws IOException
*/
public static void delete(final byte[] hash) throws IOException {
if (responseHeaderDB instanceof MapHeap) {
((MapHeap) responseHeaderDB).delete(hash);
} else {
responseHeaderDB.remove(hash);
}
responseHeaderDB.delete(hash);
fileDB.delete(hash);
}
}

@ -32,6 +32,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.zip.GZIPInputStream;
@ -45,7 +46,7 @@ import net.yacy.kelondro.util.ByteArray;
import net.yacy.kelondro.util.MemoryControl;
public class Compressor implements BLOB {
public class Compressor implements BLOB, Iterable<byte[]> {
static byte[] gzipMagic = {(byte) 'z', (byte) '|'}; // magic for gzip-encoded content
static byte[] plainMagic = {(byte) 'p', (byte) '|'}; // magic for plain content (no encoding)
@ -61,18 +62,22 @@ public class Compressor implements BLOB {
initBuffer();
}
@Override
public long mem() {
return this.backend.mem();
}
@Override
public void trim() {
this.backend.trim();
}
@Override
public String name() {
return this.backend.name();
}
@Override
public synchronized void clear() throws IOException {
initBuffer();
this.backend.clear();
@ -83,10 +88,12 @@ public class Compressor implements BLOB {
this.bufferlength = 0;
}
@Override
public ByteOrder ordering() {
return this.backend.ordering();
}
@Override
public synchronized void close(final boolean writeIDX) {
// no more thread is running, flush all queues
flushAll();
@ -164,6 +171,7 @@ public class Compressor implements BLOB {
}
}
@Override
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
// depending on the source of the result, we additionally do entry compression
// because if a document was read once, we think that it will not be retrieved another time again soon
@ -186,6 +194,7 @@ public class Compressor implements BLOB {
return decompress(b);
}
@Override
public byte[] get(final Object key) {
if (!(key instanceof byte[])) return null;
try {
@ -198,16 +207,19 @@ public class Compressor implements BLOB {
return null;
}
@Override
public boolean containsKey(final byte[] key) {
synchronized (this) {
return this.buffer.containsKey(key) || this.backend.containsKey(key);
}
}
@Override
public int keylength() {
return this.backend.keylength();
}
@Override
public synchronized long length() {
try {
return this.backend.length() + this.bufferlength;
@ -217,6 +229,7 @@ public class Compressor implements BLOB {
}
}
@Override
public long length(final byte[] key) throws IOException {
synchronized (this) {
byte[] b = this.buffer.get(key);
@ -238,6 +251,7 @@ public class Compressor implements BLOB {
return 0;
}
@Override
public void insert(final byte[] key, final byte[] b) throws IOException {
// first ensure that the files do not exist anywhere
@ -265,32 +279,47 @@ public class Compressor implements BLOB {
if (MemoryControl.shortStatus()) flushAll();
}
@Override
public synchronized void delete(final byte[] key) throws IOException {
this.backend.delete(key);
final long rx = removeFromQueues(key);
if (rx > 0) this.bufferlength -= rx;
}
@Override
public synchronized int size() {
return this.backend.size() + this.buffer.size();
}
@Override
public synchronized boolean isEmpty() {
if (!this.backend.isEmpty()) return false;
if (!this.buffer.isEmpty()) return false;
return true;
}
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
flushAll();
return this.backend.keys(up, rotating);
}
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
flushAll();
return this.backend.keys(up, firstKey);
}
@Override
public Iterator<byte[]> iterator() {
flushAll();
try {
return this.backend.keys(true, false);
} catch (IOException e) {
return null;
}
}
private boolean flushOne() {
if (this.buffer.isEmpty()) return false;
// depending on process case, write it to the file or compress it to the other queue
@ -312,6 +341,7 @@ public class Compressor implements BLOB {
}
}
@Override
public int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException {
final byte[] b = get(key);
if (b == null) return 0;
@ -323,6 +353,7 @@ public class Compressor implements BLOB {
return reduction;
}
@Override
public int reduce(final byte[] key, final Reducer reducer) throws IOException, RowSpaceExceededException {
final byte[] b = get(key);
if (b == null) return 0;
@ -334,4 +365,5 @@ public class Compressor implements BLOB {
return reduction;
}
}

@ -39,6 +39,8 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.date.GenericFormatter;
@ -57,8 +59,8 @@ import net.yacy.kelondro.util.MemoryControl;
public class MapHeap implements Map<byte[], Map<String, String>> {
private BLOB blob;
private ARC<byte[], Map<String, String>> cache;
private final BLOB blob;
private final ARC<byte[], Map<String, String>> cache;
private final char fillchar;
@ -445,11 +447,10 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
* close the Map table
*/
public synchronized void close() {
this.cache = null;
this.cache.clear();
// close file
if (this.blob != null) this.blob.close(true);
this.blob = null;
}
@Override
@ -516,6 +517,29 @@ public class MapHeap implements Map<byte[], Map<String, String>> {
return set;
}
public final static byte[] POISON_QUEUE_ENTRY = "POISON".getBytes();
public BlockingQueue<byte[]> keyQueue(final int size) {
final ArrayBlockingQueue<byte[]> set = new ArrayBlockingQueue<byte[]>(size);
(new Thread() {
@Override
public void run() {
try {
final Iterator<byte[]> i = MapHeap.this.blob.keys(true, false);
while (i.hasNext())
try {
set.put(i.next());
} catch (InterruptedException e) {
break;
}
} catch (final IOException e) {}
try {
set.put(MapHeap.POISON_QUEUE_ENTRY);
} catch (InterruptedException e) {
}
}}).start();
return set;
}
@Override
public Collection<Map<String, String>> values() {
// this method shall not be used because it is not appropriate for this kind of data

Loading…
Cancel
Save