diff --git a/defaults/yacy.init b/defaults/yacy.init index f62f0971e..a9dc0abfc 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -254,6 +254,14 @@ proxyCache = DATA/HTCACHE # default: 4 Gigabyte proxyCacheSize = 4096 +# The compression level for cached content +# Supported values ranging from 0 - no compression (lower CPU, higher disk usage), to 9 - best compression (higher CPU, lower disk use) +proxyCache.compressionLevel = 9 + +# Timeout value (in milliseconds) for acquiring a synchronization lock on getContent/store Cache operations +# When timeout occurs, loader should fall back to regular remote resource loading +proxyCache.sync.lockTimeout = 2000 + # you can use the proxy with fresh/stale rules or in a always-fresh mode proxyAlwaysFresh = false diff --git a/htroot/ConfigHTCache_p.html b/htroot/ConfigHTCache_p.html index 2354a52f9..48bc2eb9a 100644 --- a/htroot/ConfigHTCache_p.html +++ b/htroot/ConfigHTCache_p.html @@ -1,5 +1,5 @@ - - + + YaCy '#[clientname]#': Hypertext Cache Configuration #%env/templates/metas.template%# @@ -18,10 +18,24 @@
-
+
#[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average
MB
+
+
+
+ + Concurrent access timeout info + The maximum time to wait for acquiring a synchronization lock on concurrent get/store cache operations. + Beyond this limit, the crawler or proxy falls back to regular remote resource loading. + +
+
milliseconds
 
diff --git a/htroot/ConfigHTCache_p.java b/htroot/ConfigHTCache_p.java index 6005e1d2a..88baf9c95 100644 --- a/htroot/ConfigHTCache_p.java +++ b/htroot/ConfigHTCache_p.java @@ -28,6 +28,7 @@ import java.io.File; import java.io.IOException; +import java.util.zip.Deflater; import net.yacy.cora.protocol.RequestHeader; import net.yacy.crawler.data.Cache; @@ -61,6 +62,19 @@ public class ConfigHTCache_p { final int newProxyCacheSize = Math.max(post.getInt("maxCacheSize", 64), 0); env.setConfig(SwitchboardConstants.PROXY_CACHE_SIZE, newProxyCacheSize); Cache.setMaxCacheSize(newProxyCacheSize * 1024L * 1024L); + + /* Compression level*/ + /* Ensure a value within the range supported by the Deflater class */ + final int newCompressionLevel = Math.max(Deflater.NO_COMPRESSION, Math.min(Deflater.BEST_COMPRESSION, + post.getInt("compressionLevel", SwitchboardConstants.HTCACHE_COMPRESSION_LEVEL_DEFAULT))); + env.setConfig(SwitchboardConstants.HTCACHE_COMPRESSION_LEVEL, newCompressionLevel); + Cache.setCompressionLevel(newCompressionLevel); + + /* Synchronization lock timeout */ + final long newLockTimeout = Math.max(10, Math.min(60000, + post.getLong("lockTimeout", SwitchboardConstants.HTCACHE_SYNC_LOCK_TIMEOUT_DEFAULT))); + env.setConfig(SwitchboardConstants.HTCACHE_SYNC_LOCK_TIMEOUT, newLockTimeout); + Cache.setLockTimeout(newLockTimeout); } if (post != null && post.containsKey("deletecomplete")) { @@ -73,6 +87,32 @@ public class ConfigHTCache_p { } prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT)); + + /* Compression levels */ + final int configuredCompressionLevel = env.getConfigInt(SwitchboardConstants.HTCACHE_COMPRESSION_LEVEL, + SwitchboardConstants.HTCACHE_COMPRESSION_LEVEL_DEFAULT); + int levelsCount = 0; + for(int level = Deflater.NO_COMPRESSION; level <= Deflater.BEST_COMPRESSION; level++) { + if(level == configuredCompressionLevel) { + prop.put("compressionLevels_" + levelsCount + "_selected", "1"); + } else { + prop.put("compressionLevels_" + levelsCount + "_selected", "0"); + } + prop.put("compressionLevels_" + levelsCount + "_value", level); + prop.put("compressionLevels_" + levelsCount + "_name", level); + if(level == Deflater.NO_COMPRESSION) { + prop.put("compressionLevels_" + levelsCount + "_name", "0 - No compression"); + } else if(level == Deflater.BEST_SPEED) { + prop.put("compressionLevels_" + levelsCount + "_name", Deflater.BEST_SPEED + " - Best speed"); + } else if(level == Deflater.BEST_COMPRESSION) { + prop.put("compressionLevels_" + levelsCount + "_name", Deflater.BEST_COMPRESSION + " - Best compression"); + } + levelsCount++; + } + prop.put("compressionLevels", levelsCount); + + prop.put("lockTimeout", env.getConfigLong(SwitchboardConstants.HTCACHE_SYNC_LOCK_TIMEOUT, + SwitchboardConstants.HTCACHE_SYNC_LOCK_TIMEOUT_DEFAULT)); prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024); prop.put("actualCacheDocCount", Cache.getActualCacheDocCount()); prop.put("docSizeAverage", Cache.getActualCacheDocCount() == 0 ? 0 : Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024); diff --git a/source/net/yacy/crawler/data/Cache.java b/source/net/yacy/crawler/data/Cache.java index 96a97872f..bc5189e4f 100644 --- a/source/net/yacy/crawler/data/Cache.java +++ b/source/net/yacy/crawler/data/Cache.java @@ -84,8 +84,9 @@ public final class Cache { * @param peerSalt peer identifier * @param cacheSizeMax maximum cache size in bytes * @param lockTimeout maximum time (in milliseconds) to acquire a synchronization lock on store() and getContent() + * @param compressionLevel the compression level : supported values ranging from 0 - no compression, to 9 - best compression */ - public static void init(final File htCachePath, final String peerSalt, final long cacheSizeMax, final long lockTimeout) { + public static void init(final File htCachePath, final String peerSalt, final long cacheSizeMax, final long lockTimeout, final int compressionLevel) { cachePath = htCachePath; maxCacheSize = cacheSizeMax; @@ -116,7 +117,7 @@ public final class Cache { try { fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, DEFAULT_BACKEND_BUFFER_SIZE, false, true); fileDBunbuffered.setMaxSize(maxCacheSize); - fileDB = new Compressor(fileDBunbuffered, DEFAULT_COMPRESSOR_BUFFER_SIZE, lockTimeout); + fileDB = new Compressor(fileDBunbuffered, DEFAULT_COMPRESSOR_BUFFER_SIZE, lockTimeout, compressionLevel); } catch (final IOException e) { ConcurrentLog.logException(e); // try a healing @@ -125,7 +126,7 @@ public final class Cache { try { fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, Base64Order.enhancedCoder, 12, DEFAULT_BACKEND_BUFFER_SIZE, false, true); fileDBunbuffered.setMaxSize(maxCacheSize); - fileDB = new Compressor(fileDBunbuffered, DEFAULT_COMPRESSOR_BUFFER_SIZE, lockTimeout); + fileDB = new Compressor(fileDBunbuffered, DEFAULT_COMPRESSOR_BUFFER_SIZE, lockTimeout, compressionLevel); } catch (final IOException ee) { ConcurrentLog.logException(e); } @@ -226,6 +227,22 @@ public final class Cache { public static long getActualCacheDocCount() { return fileDBunbuffered.size(); } + + /** + * Set the new content compression level + * @param newCompressionLevel the new compression level. Supported values between 0 (no compression) and 9 (best compression) + */ + public static void setCompressionLevel(final int newCompressionLevel) { + fileDB.setCompressionLevel(newCompressionLevel); + } + + /** + * Set the new synchronization lock timeout. + * @param lockTimeout the new synchronization lock timeout (in milliseconds). + */ + public static void setLockTimeout(final long lockTimeout) { + fileDB.setLockTimeout(lockTimeout); + } /** * close the databases diff --git a/source/net/yacy/kelondro/blob/Compressor.java b/source/net/yacy/kelondro/blob/Compressor.java index 1bbb2c233..a78cb3cd1 100644 --- a/source/net/yacy/kelondro/blob/Compressor.java +++ b/source/net/yacy/kelondro/blob/Compressor.java @@ -61,19 +61,32 @@ public class Compressor implements BLOB, Iterable { /** Total size (in bytes) of uncompressed entries in buffer */ private long bufferlength; + + /** Maximum {@link #bufferlength} value before compressing and flushing to the backend */ private final long maxbufferlength; /** Maximum time (in milliseconds) to acquire a synchronization lock on get() and insert() */ - private final long lockTimeout; + private long lockTimeout; /** Synchronization lock */ private final ReentrantLock lock; - - public Compressor(final BLOB backend, final long buffersize, final long lockTimeout) { + + /** The compression level */ + private int compressionLevel; + + /** + * @param backend the backend storage + * @param buffersize the maximum total size (in bytes) of uncompressed in-memory entries before compressing and flushing to the backend + * @param lockTimeout maximum time to acquire a synchronization lock on get() and insert() operations + * @param compressionLevel the compression level : supported values ranging from 0 - no compression, to 9 - best compression + */ + public Compressor(final BLOB backend, final long buffersize, final long lockTimeout, final int compressionLevel) { this.backend = backend; this.maxbufferlength = buffersize; this.lockTimeout = lockTimeout; this.lock = new ReentrantLock(); + /* Ensure a value within the range supported by the Deflater class */ + this.compressionLevel = Math.max(Deflater.NO_COMPRESSION, Math.min(Deflater.BEST_COMPRESSION, compressionLevel)); initBuffer(); } @@ -125,21 +138,21 @@ public class Compressor implements BLOB, Iterable { } } - private static byte[] compress(final byte[] b) { + private static byte[] compress(final byte[] b, final int compressionLevel) { final int l = b.length; if (l < 100) return markWithPlainMagic(b); - final byte[] bb = compressAddMagic(b); + final byte[] bb = compressAddMagic(b, compressionLevel); if (bb.length >= l) return markWithPlainMagic(b); return bb; } - private static byte[] compressAddMagic(final byte[] b) { + private static byte[] compressAddMagic(final byte[] b, final int compressionLevel) { // compress a byte array and add a leading magic for the compression try { //System.out.print("/(" + cdr + ")"); // DEBUG final ByteArrayOutputStream baos = new ByteArrayOutputStream(b.length / 5); baos.write(gzipMagic); - final OutputStream os = new GZIPOutputStream(baos, 65536){{def.setLevel(Deflater.BEST_COMPRESSION);}}; + final OutputStream os = new GZIPOutputStream(baos, 65536){{def.setLevel(compressionLevel);}}; os.write(b); os.close(); baos.close(); @@ -213,7 +226,7 @@ public class Compressor implements BLOB, Iterable { b = this.buffer.remove(key); if (b != null) { this.bufferlength = this.bufferlength - b.length; - this.backend.insert(key, compress(b)); + this.backend.insert(key, compress(b, this.compressionLevel)); return b; } } finally { @@ -412,7 +425,7 @@ public class Compressor implements BLOB, Iterable { final Map.Entry entry = this.buffer.entrySet().iterator().next(); this.buffer.remove(entry.getKey()); try { - this.backend.insert(entry.getKey(), compress(entry.getValue())); + this.backend.insert(entry.getKey(), compress(entry.getValue(), this.compressionLevel)); this.bufferlength -= entry.getValue().length; return true; } catch (final IOException e) { @@ -457,6 +470,23 @@ public class Compressor implements BLOB, Iterable { insert(key, c); return reduction; } + + /** + * Set the new content compression level. + * @param compressionLevel the new compression level. Supported values between 0 (no compression) and 9 (best compression). + */ + public void setCompressionLevel(final int compressionLevel) { + /* Ensure a value within the range supported by the Deflater class */ + this.compressionLevel = Math.max(Deflater.NO_COMPRESSION, Math.min(Deflater.BEST_COMPRESSION, compressionLevel)); + } + + /** + * Set the new synchronization lock timeout. + * @param lockTimeout the new synchronization lock timeout (in milliseconds). + */ + public void setLockTimeout(final long lockTimeout) { + this.lockTimeout = lockTimeout; + } } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 1ae05c993..cd7e4850e 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -723,7 +723,11 @@ public final class Switchboard extends serverSwitch { this.log.info("HTCACHE Path = " + this.htCachePath.getAbsolutePath()); final long maxCacheSize = 1024L * 1024L * Long.parseLong(getConfig(SwitchboardConstants.PROXY_CACHE_SIZE, "2")); // this is megabyte - Cache.init(this.htCachePath, this.peers.mySeed().hash, maxCacheSize, 2000); + Cache.init(this.htCachePath, this.peers.mySeed().hash, maxCacheSize, + getConfigLong(SwitchboardConstants.HTCACHE_SYNC_LOCK_TIMEOUT, + SwitchboardConstants.HTCACHE_SYNC_LOCK_TIMEOUT_DEFAULT), + getConfigInt(SwitchboardConstants.HTCACHE_COMPRESSION_LEVEL, + SwitchboardConstants.HTCACHE_COMPRESSION_LEVEL_DEFAULT)); final File transactiondir = new File(this.htCachePath, "snapshots"); Transactions.init(transactiondir); diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index ff69a6abe..098403711 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -26,6 +26,8 @@ package net.yacy.search; +import java.util.zip.Deflater; + import net.yacy.cora.order.Digest; import net.yacy.server.http.RobotsTxtConfig; @@ -390,6 +392,19 @@ public final class SwitchboardConstants { */ public static final String HTCACHE_PATH = "proxyCache"; public static final String HTCACHE_PATH_DEFAULT = "DATA/HTCACHE"; + + /** Key of the setting configuring the cache synchronization */ + public static final String HTCACHE_COMPRESSION_LEVEL = "proxyCache.compressionLevel"; + + /** Default compression level for cached content */ + public static final int HTCACHE_COMPRESSION_LEVEL_DEFAULT = Deflater.BEST_COMPRESSION; + + /** Key of the setting configuring Cache synchronization lock timeout on getContent/store operations*/ + public static final String HTCACHE_SYNC_LOCK_TIMEOUT = "proxyCache.sync.lockTimeout"; + + /** Default timeout value (in milliseconds) for acquiring a synchronization lock on getContent/store Cache operations */ + public static final long HTCACHE_SYNC_LOCK_TIMEOUT_DEFAULT = 2000; + public static final String RELEASE_PATH = "releases"; public static final String RELEASE_PATH_DEFAULT = "DATA/RELEASE"; diff --git a/test/java/net/yacy/crawler/data/CacheTest.java b/test/java/net/yacy/crawler/data/CacheTest.java index bb71b5e84..8c8fe5d96 100644 --- a/test/java/net/yacy/crawler/data/CacheTest.java +++ b/test/java/net/yacy/crawler/data/CacheTest.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.Random; import java.util.concurrent.TimeUnit; import java.util.logging.LogManager; +import java.util.zip.Deflater; import org.apache.http.HttpStatus; import org.junit.After; @@ -68,7 +69,7 @@ public class CacheTest { Cache.init(new File(System.getProperty("java.io.tmpdir") + File.separator + "testCache"), "peerSalt", Math.max(Math.max(TEXT_CONTENT.getBytes(StandardCharsets.UTF_8).length * 10, Cache.DEFAULT_COMPRESSOR_BUFFER_SIZE * 2), Cache.DEFAULT_BACKEND_BUFFER_SIZE * 2), - 2000); + 2000, Deflater.BEST_COMPRESSION); Cache.clear(); } @@ -497,9 +498,11 @@ public class CacheTest { final long sleepTime = 0; /* Maximum waiting time (in ms) for acquiring a synchronization lock */ final long lockTimeout = 2000; + /* The backend compression level */ + final int compressionLevel = Deflater.BEST_COMPRESSION; Cache.init(new File(System.getProperty("java.io.tmpdir") + File.separator + "yacyTestCache"), "peerSalt", - cacheMaxSize, lockTimeout); + cacheMaxSize, lockTimeout, compressionLevel); Cache.clear(); System.out.println("Cache initialized with a maximum size of " + cacheMaxSize + " bytes.");