From 9eb668e95118267837f74ee6f0c861c2b6cf369c Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 12 Feb 2014 01:00:44 +0100 Subject: [PATCH] enhanced the resource observer The resource observer is now able to recognize free disk space AND available space for YaCy. The amount of space which is assigned for YaCy are defined in new settings in the configuration file. Furthermore, there is now a cleanup process which deletes files in case that an autodelete is activated. The autodelete is now BY DEFAULT ON if the disk space is low, which means that YaCy starts to delete documents when the disk is full! --- defaults/yacy.init | 22 ++ htroot/PerformanceQueues_p.java | 8 +- htroot/Status.java | 6 +- htroot/Status_p.inc | 2 +- source/net/yacy/kelondro/rwi/IndexCell.java | 37 +++ .../yacy/kelondro/rwi/ReferenceContainer.java | 1 + .../kelondro/rwi/ReferenceContainerArray.java | 4 + .../kelondro/rwi/ReferenceContainerCache.java | 4 + source/net/yacy/search/ResourceObserver.java | 261 +++++++++++++----- .../net/yacy/search/SwitchboardConstants.java | 14 +- source/net/yacy/search/index/Fulltext.java | 12 + 11 files changed, 298 insertions(+), 73 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index f3c75cfee..e073a959f 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1056,6 +1056,28 @@ disk.free = 3000 # minimum for DHT disk.free.hardlimit = 1000 +# ResourceObserver settings +# We apply the naming of control circuit states to resources observer limit values (steady-state value, over/undershot) +# under/overshot states in the system are supposed to be regulated to match the steady-state value + +# autoregulation of resource states +# ATTENTION: be aware that using the autoregulate-option causes that the search index data is DELETED as soon as threshold-values are reached! +# the autoregulate function starts workin if resources reach over/undershot values and the auto-regulation tries to regulate to the steadystate value +resource.disk.free.autoregulate=true +resource.disk.used.autoregulate=false + +# the target steady-state of minimum disk space left (MB) +resource.disk.free.min.steadystate=4096 + +# the undershot below the steady-state of minimum disk free as absolute size (MB) +resource.disk.free.min.undershot=2048 + +# the target steady-state of maximum disk space for YaCy (MB) +resource.disk.used.max.steadystate=524288 + +# the overshot above the steady-state of disk space for YaCy (absolute) (MB) +resource.disk.used.max.overshot=1048576 + # minimum memory to accept dht-in (MiB) memory.acceptDHTabove = 50 memory.disabledDHT = false diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index e0bf584ac..206bd9e3d 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -83,10 +83,10 @@ public class PerformanceQueues_p { prop.put("setStartupCommit", "1"); } if(post.containsKey("diskFree")) { - sb.setConfig(SwitchboardConstants.DISK_FREE, post.getInt("diskFree", 3000)); + sb.setConfig(SwitchboardConstants.RESOURCE_DISK_FREE_MIN_STEADYSTATE, post.getInt("diskFree", 3000)); } if(post.containsKey("diskFreeHardlimit")) { - sb.setConfig(SwitchboardConstants.DISK_FREE_HARDLIMIT, post.getInt("diskFreeHardlimit", 1000)); + sb.setConfig(SwitchboardConstants.RESOURCE_DISK_FREE_MIN_UNDERSHOT, post.getInt("diskFreeHardlimit", 1000)); } if(post.containsKey("memoryAcceptDHT")) { sb.setConfig(SwitchboardConstants.MEMORY_ACCEPTDHT, post.getInt("memoryAcceptDHT", 50)); @@ -325,8 +325,8 @@ public class PerformanceQueues_p { final String Xms = sb.getConfig("javastart_Xms", "Xms600m").substring(3); prop.put("Xms", Xms.substring(0, Xms.length() - 1)); - final long diskFree = sb.getConfigLong(SwitchboardConstants.DISK_FREE, 3000L); - final long diskFreeHardlimit = sb.getConfigLong(SwitchboardConstants.DISK_FREE_HARDLIMIT, 1000L); + final long diskFree = sb.getConfigLong(SwitchboardConstants.RESOURCE_DISK_FREE_MIN_STEADYSTATE, 3000L); + final long diskFreeHardlimit = sb.getConfigLong(SwitchboardConstants.RESOURCE_DISK_FREE_MIN_UNDERSHOT, 1000L); final long memoryAcceptDHT = sb.getConfigLong(SwitchboardConstants.MEMORY_ACCEPTDHT, 50000L); final boolean observerTrigger = !MemoryControl.properState(); prop.put("diskFree", diskFree); diff --git a/htroot/Status.java b/htroot/Status.java index 5ff0dbaf8..6ffdb95c3 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -123,7 +123,7 @@ public class Status final boolean adminaccess = sb.adminAuthenticated(header) >= 2; if ( adminaccess ) { prop.put("showPrivateTable", "1"); - prop.put("privateStatusTable", "status_p.inc"); + prop.put("privateStatusTable", "Status_p.inc"); } else { prop.put("showPrivateTable", "0"); prop.put("privateStatusTable", ""); @@ -145,7 +145,7 @@ public class Status // resource observer status if ( adminaccess ) { if ( !sb.observer.getDiskAvailable() ) { - final String minFree = Formatter.bytesToString(sb.observer.getMinFreeDiskSpace()); + final String minFree = Formatter.bytesToString(sb.observer.getMinFreeDiskSteadystate()); prop.put("warningDiskSpaceLow", "1"); prop.put("warningDiskSpaceLow_minSpace", minFree); } @@ -320,7 +320,7 @@ public class Status // memory usage and system attributes prop.put("usedMemory", Formatter.bytesToString(MemoryControl.total())); prop.put("maxMemory", Formatter.bytesToString(MemoryControl.maxMemory())); - prop.put("usedDisk", Formatter.bytesToString(sb.observer.getSizeOfDataPath())); + prop.put("usedDisk", Formatter.bytesToString(sb.observer.getSizeOfDataPath(true))); prop.put("freeDisk", Formatter.bytesToString(sb.observer.getUsableSpace())); prop.put("processors", WorkflowProcessor.availableCPU); prop.put("load", Memory.load()); diff --git a/htroot/Status_p.inc b/htroot/Status_p.inc index 87c2082f4..944188744 100644 --- a/htroot/Status_p.inc +++ b/htroot/Status_p.inc @@ -70,7 +70,7 @@
- +
RAM used:#[usedMemory]#
RAM max:#[maxMemory]#
DISK used:#[usedDisk]#
DISK used:(approx.) #[usedDisk]#
DISK free:#[freeDisk]#
diff --git a/source/net/yacy/kelondro/rwi/IndexCell.java b/source/net/yacy/kelondro/rwi/IndexCell.java index 2a452a1fd..fa9651d24 100644 --- a/source/net/yacy/kelondro/rwi/IndexCell.java +++ b/source/net/yacy/kelondro/rwi/IndexCell.java @@ -28,6 +28,8 @@ package net.yacy.kelondro.rwi; import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; @@ -38,6 +40,7 @@ import net.yacy.cora.order.Order; import net.yacy.cora.sorting.Rating; import net.yacy.cora.storage.ComparableARC; import net.yacy.cora.storage.HandleSet; +import net.yacy.cora.util.ByteArray; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.SpaceExceededException; import net.yacy.kelondro.data.word.Word; @@ -210,6 +213,40 @@ public final class IndexCell extends AbstractBu return donesomething; } + public int deleteOld(int minsize, long maxtime) throws IOException { + long timeout = System.currentTimeMillis() + maxtime; + Collection keys = keys4LargeReferences(minsize, maxtime / 3); + int c = 0; + int oldShrinkMaxsize = ReferenceContainer.maxReferences; + ReferenceContainer.maxReferences = minsize; + for (byte[] key: keys) { + ReferenceContainer container = this.get(key, null); + container.shrinkReferences(); + try {this.add(container); c++;} catch (SpaceExceededException e) {} + if (System.currentTimeMillis() > timeout) break; + } + ReferenceContainer.maxReferences = oldShrinkMaxsize; + return c; + } + + private Collection keys4LargeReferences(int minsize, long maxtime) throws IOException { + long timeout = System.currentTimeMillis() + maxtime; + ArrayList keys = new ArrayList(); + Iterator ci = this.ram.keys(); + while (ci.hasNext()) { + byte[] k = ci.next().asBytes(); + if (this.ram.count(k) >= minsize) keys.add(k); + } + CloneableIterator ki = this.array.keys(true, false); + while (ki.hasNext()) { + byte[] k = ki.next(); + if (this.array.count(k) >= minsize) keys.add(k); + if (System.currentTimeMillis() > timeout) break; + } + return keys; + } + + /* * methods to implement Index */ diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainer.java b/source/net/yacy/kelondro/rwi/ReferenceContainer.java index d33cdec49..5c928a6d6 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainer.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainer.java @@ -210,6 +210,7 @@ public class ReferenceContainer extends RowSet } sort(); } + trim(); return oldsize - size(); } diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java b/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java index 70b6def32..7ac75d587 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java @@ -390,6 +390,10 @@ public final class ReferenceContainerArray { public ReferenceContainer reduce(ReferenceContainer container); } + + public CloneableIterator keys(boolean up, boolean rotating) throws IOException { + return this.array.keys(up, rotating); + } public int entries() { return this.array.entries(); diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java index b3809c1a7..b94784d72 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java @@ -106,6 +106,10 @@ public final class ReferenceContainerCache exte public synchronized void close() { this.cache = null; } + + public Iterator keys() { + return this.cache.keySet().iterator(); + } /** * dump the cache to a file. This method can be used in a destructive way diff --git a/source/net/yacy/search/ResourceObserver.java b/source/net/yacy/search/ResourceObserver.java index f7941a09f..d156f9e4c 100644 --- a/source/net/yacy/search/ResourceObserver.java +++ b/source/net/yacy/search/ResourceObserver.java @@ -25,33 +25,43 @@ package net.yacy.search; import java.io.File; +import java.io.IOException; import org.apache.commons.io.FileUtils; import net.yacy.cora.document.WordCache; import net.yacy.cora.protocol.Domains; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.crawler.data.Cache; +import net.yacy.crawler.data.ResultURLs; +import net.yacy.data.WorkTables; +import net.yacy.kelondro.data.word.WordReference; +import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.util.MemoryControl; +import net.yacy.peers.NewsPool; import net.yacy.search.query.SearchEventCache; public class ResourceObserver { public static final ConcurrentLog log = new ConcurrentLog("RESOURCE OBSERVER"); - // return values for available disk/memory + // status type for which shows where in the control-circuit model a memory state can be categorized public enum Space implements Comparable { - LOW, MEDIUM, HIGH; // according to the order of the definition, LOW is smaller than MEDIUM and MEDIUM is smaller than HIGH + EXHAUSTED, // smallest space state, outside of over/undershot + NOMINAL, // wanted-space state between steady-state and under/overshot + AMPLE; // largest space state, below steady-state } private final Switchboard sb; private final File path; // path to check - private Space normalizedDiskFree = Space.HIGH; - private Space normalizedMemoryFree = Space.HIGH; + private Space normalizedDiskFree = Space.AMPLE; + private Space normalizedDiskUsed = Space.AMPLE; + private Space normalizedMemoryFree = Space.AMPLE; public ResourceObserver(final Switchboard sb) { this.sb = sb; - this.path = sb.getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, ""); + this.path = sb.getDataPath(SwitchboardConstants.INDEX_PRIMARY_PATH, "").getParentFile(); log.info("path for disc space measurement: " + this.path); } @@ -67,13 +77,18 @@ public class ResourceObserver { public void resourceObserverJob() { MemoryControl.setProperMbyte(getMinFreeMemory()); - this.normalizedDiskFree = getNormalizedDiskFree(); + this.normalizedDiskFree = getNormalizedDiskFree(); + this.normalizedDiskUsed = getNormalizedDiskUsed(true); this.normalizedMemoryFree = getNormalizedMemoryFree(); - if (this.normalizedDiskFree.compareTo(Space.HIGH) < 0 || this.normalizedMemoryFree.compareTo(Space.HIGH) < 0 ) { + // take actions if disk space is below AMPLE + if (this.normalizedDiskFree != Space.AMPLE || + this.normalizedDiskUsed != Space.AMPLE || + this.normalizedMemoryFree != Space.AMPLE ) { String reason = ""; - if (this.normalizedDiskFree.compareTo(Space.HIGH) < 0) reason += " not enough disk space, " + getUsableSpace(); - if (this.normalizedMemoryFree.compareTo(Space.HIGH) < 0 ) reason += " not enough memory space"; + if (this.normalizedDiskFree != Space.AMPLE) reason += " not enough disk space, " + getUsableSpace(); + if (this.normalizedDiskUsed != Space.AMPLE) reason += " too high disk usage, " + getNormalizedDiskUsed(true); + if (this.normalizedMemoryFree != Space.AMPLE ) reason += " not enough memory space"; if (!this.sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) { log.info("pausing local crawls"); this.sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, "resource observer:" + reason); @@ -83,7 +98,7 @@ public class ResourceObserver { this.sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, "resource observer:" + reason); } - if ((this.normalizedDiskFree == Space.LOW || this.normalizedMemoryFree.compareTo(Space.HIGH) < 0) && this.sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) { + if ((this.normalizedDiskFree == Space.EXHAUSTED || this.normalizedMemoryFree != Space.AMPLE) && this.sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) { log.info("disabling index receive"); this.sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false); this.sb.peers.mySeed().setFlagAcceptRemoteIndex(false); @@ -91,21 +106,130 @@ public class ResourceObserver { } } - else { - if(this.sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false)) { // we were wrong! - log.info("enabling index receive"); - this.sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true); - this.sb.peers.mySeed().setFlagAcceptRemoteIndex(true); - this.sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false); - } - log.info("resources ok"); - } + // shrink resources if space is EXHAUSTED + if ((this.normalizedDiskFree == Space.EXHAUSTED && this.sb.getConfigBool(SwitchboardConstants.RESOURCE_DISK_FREE_AUTOREGULATE, false)) || + (this.normalizedDiskUsed == Space.EXHAUSTED && this.sb.getConfigBool(SwitchboardConstants.RESOURCE_DISK_USED_AUTOREGULATE, false))) { + shrinkmethods: while (true /*this is not a loop, just a construct that we can leave with a break*/) { + // delete old releases + //if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // delete fetched snippets + log.info("DISK SPACE EXHAUSTED - deleting snippet cache"); + sb.tables.clear(WorkTables.TABLE_SEARCH_FAILURE_NAME); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // clear HTCACHE + log.info("DISK SPACE EXHAUSTED - deleting HTCACHE"); + Cache.clear(); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // delete logs + //if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // delete robots.txt + log.info("DISK SPACE EXHAUSTED - deleting robots.txt database"); + try {sb.robots.clear();} catch (final IOException e) {} + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // delete news + log.info("DISK SPACE EXHAUSTED - deleting News database"); + sb.peers.newsPool.clear(NewsPool.INCOMING_DB); sb.peers.newsPool.clear(NewsPool.PROCESSED_DB); + sb.peers.newsPool.clear(NewsPool.OUTGOING_DB); sb.peers.newsPool.clear(NewsPool.PUBLISHED_DB); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // clear citations + if (sb.index.connectedCitation()) { + log.info("DISK SPACE EXHAUSTED - deleting citations"); + try {sb.index.urlCitation().clear();} catch (final IOException e) {} + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + } + + // throw away crawl queues, if they are large + if (sb.crawlQueues.coreCrawlJobSize() > 1000) { + log.info("DISK SPACE EXHAUSTED - deleting crawl queues"); + sb.crawlQueues.clear(); + sb.crawlStacker.clear(); + ResultURLs.clearStacks(); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + } + + // cut away too large RWIs + IndexCell termIndex = sb.index.termIndex(); + try { + int shrinkedReferences = termIndex.deleteOld(100, 10000); + if (shrinkedReferences > 0) { + log.info("DISK SPACE EXHAUSTED - shrinked " + shrinkedReferences + " RWI references to a maximum of 100"); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + } + } catch (IOException e) { + } + + // delete too old RWIs + //if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // delete fulltext from large Solr documents + //if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + + // run a solr optimize + this.sb.index.fulltext().commit(false); + this.sb.index.fulltext().optimize(1); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break shrinkmethods; + + /* + // delete old Solr documents + long day = 1000 * 60 * 60 * 24; + for (int t = 12; t >= 1 ; t --) { + log.info("DISK SPACE EXHAUSTED - deleting documents with loaddate > " + t + " months"); + this.sb.index.fulltext().deleteOldDocuments(t * 30 * day, true); + this.sb.index.fulltext().commit(false); + this.sb.index.fulltext().optimize(1); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break shrinkmethods; + } + for (int t = 30; t > 3 ; t --) { + log.info("DISK SPACE EXHAUSTED - deleting documents with loaddate > " + t + " days"); + this.sb.index.fulltext().deleteOldDocuments(t * day, true); + this.sb.index.fulltext().commit(false); + this.sb.index.fulltext().optimize(1); + if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break shrinkmethods; + } + */ + + // WE SHOULD NEVER GET UP TO HERE... + /* + // delete ALL RWIs + if (sb.index.termIndex() != null) { + try {sb.index.termIndex().clear();} catch (final IOException e) {} + //if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + } + + // delete full Solr + try {sb.index.fulltext().clearLocalSolr();} catch (final IOException e) {} + //if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break; + */ + break; // DO NOT REMOVE THIS, the loop may run forever. It shall run only once. + } + this.normalizedDiskFree = getNormalizedDiskFree(); + this.normalizedDiskUsed = getNormalizedDiskUsed(false); + this.normalizedMemoryFree = getNormalizedMemoryFree(); + } + + // normalize state if the resources are AMPLE + if (this.normalizedDiskFree == Space.AMPLE && this.normalizedDiskUsed == Space.AMPLE && this.normalizedMemoryFree == Space.AMPLE ) { + if(this.sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false)) { // we were wrong! + log.info("enabling index receive"); + this.sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true); + this.sb.peers.mySeed().setFlagAcceptRemoteIndex(true); + this.sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false); + } + log.info("resources ok"); + } + } private long sizeOfDirectory_lastCountTime = 0; private long sizeOfDirectory_lastCountValue = 0; - public long getSizeOfDataPath() { - if (System.currentTimeMillis() - this.sizeOfDirectory_lastCountTime < 60000) return this.sizeOfDirectory_lastCountValue; + public long getSizeOfDataPath(final boolean cached) { + if (cached && System.currentTimeMillis() - this.sizeOfDirectory_lastCountTime < 600000) return this.sizeOfDirectory_lastCountValue; this.sizeOfDirectory_lastCountTime = System.currentTimeMillis(); this.sizeOfDirectory_lastCountValue = FileUtils.sizeOfDirectory(this.path); return this.sizeOfDirectory_lastCountValue; @@ -114,6 +238,25 @@ public class ResourceObserver { public long getUsableSpace() { return this.path.getUsableSpace(); } + + private Space getNormalizedDiskUsed(final boolean cached) { + final long currentUsed = getSizeOfDataPath(cached); + //final long currentSpace = getUsableSpace(this.path); + if (currentUsed < 1L) return Space.AMPLE; + Space ret = Space.AMPLE; + + if (currentUsed > getMaxUsedDiskOvershot()) { + log.warn("Volume " + this.path.toString() + ": used space (" + (currentUsed / 1024 / 1024) + " MB) is too high (> " + (getMaxUsedDiskOvershot() / 1024 / 1024) + " MB)"); + ret = Space.EXHAUSTED; + return ret; + } + if (currentUsed > getMaxUsedDiskSteadystate()) { + log.info("Volume " + this.path.toString() + ": used space (" + (currentUsed / 1024 / 1024) + " MB) is high, but nominal (> " + (getMaxUsedDiskSteadystate() / 1024 / 1024) + " MB)"); + ret = Space.NOMINAL; + return ret; + } + return ret; + } /** * returns the amount of disk space available @@ -126,22 +269,24 @@ public class ResourceObserver { private Space getNormalizedDiskFree() { final long currentSpace = getUsableSpace(); //final long currentSpace = getUsableSpace(this.path); - if (currentSpace < 1L) return Space.HIGH; - Space ret = Space.HIGH; + if (currentSpace < 1L) return Space.AMPLE; // this happens if the function does not work, like on Windows + Space ret = Space.AMPLE; - if (currentSpace < getMinFreeDiskSpace()) { - log.warn("Volume " + this.path.toString() + ": free space (" + (currentSpace / 1024 / 1024) + " MB) is low (< " + (getMinFreeDiskSpace() / 1024 / 1024) + " MB)"); - ret = Space.MEDIUM; - } - if (currentSpace < getMinFreeDiskSpace_hardlimit()) { - log.warn("Volume " + this.path.toString() + ": free space (" + (currentSpace / 1024 / 1024) + " MB) is too low (< " + (getMinFreeDiskSpace() / 1024 / 1024) + " MB)"); - ret = Space.LOW; + if (currentSpace < getMinFreeDiskUndershot()) { + log.warn("Volume " + this.path.toString() + ": free space (" + (currentSpace / 1024 / 1024) + " MB) is too low (< " + (getMinFreeDiskSteadystate() / 1024 / 1024) + " MB)"); + ret = Space.EXHAUSTED; + return ret; + } + if (currentSpace < getMinFreeDiskSteadystate()) { + log.info("Volume " + this.path.toString() + ": free space (" + (currentSpace / 1024 / 1024) + " MB) is low, but nominal (< " + (getMinFreeDiskSteadystate() / 1024 / 1024) + " MB)"); + ret = Space.NOMINAL; + return ret; } return ret; } private Space getNormalizedMemoryFree() { - if(MemoryControl.properState()) return Space.HIGH; + if(MemoryControl.properState()) return Space.AMPLE; // clear some caches - @all: are there more of these, we could clear here? this.sb.index.clearCaches(); @@ -151,35 +296,49 @@ public class ResourceObserver { WordCache.clearCommonWords(); Domains.clear(); - return MemoryControl.properState()? Space.HIGH : Space.LOW; + return MemoryControl.properState()? Space.AMPLE : Space.EXHAUSTED; } /** * @return true if disk space is available */ public boolean getDiskAvailable() { - return this.normalizedDiskFree == Space.HIGH; + return this.normalizedDiskFree == Space.AMPLE; } /** * @return true if memory is available */ public boolean getMemoryAvailable() { - return this.normalizedMemoryFree == Space.HIGH; + return this.normalizedMemoryFree == Space.AMPLE; + } + + /** + * @return amount of space (bytes) that should be used in steady state + */ + public long getMaxUsedDiskSteadystate() { + return this.sb.getConfigLong(SwitchboardConstants.RESOURCE_DISK_USED_MAX_STEADYSTATE, 524288) /* MB */ * 1024L * 1024L; } /** - * @return amount of space (bytes) that should be kept free + * @return amount of space (bytes) that should at least be kept free as hard limit; the limit when autoregulation to steady state should start */ - public long getMinFreeDiskSpace() { - return this.sb.getConfigLong(SwitchboardConstants.DISK_FREE, 3000) /* MiB */ * 1024L * 1024L; + public long getMaxUsedDiskOvershot() { + return this.sb.getConfigLong(SwitchboardConstants.RESOURCE_DISK_USED_MAX_OVERSHOT, 1048576) /* MB */ * 1024L * 1024L; + } + + /** + * @return amount of space (bytes) that should be kept free as steady state + */ + public long getMinFreeDiskSteadystate() { + return this.sb.getConfigLong(SwitchboardConstants.RESOURCE_DISK_FREE_MIN_STEADYSTATE, 2048) /* MB */ * 1024L * 1024L; } /** - * @return amount of space (bytes) that should at least be kept free + * @return amount of space (bytes) that should at least be kept free as hard limit; the limit when autoregulation to steady state should start */ - public long getMinFreeDiskSpace_hardlimit() { - return this.sb.getConfigLong(SwitchboardConstants.DISK_FREE_HARDLIMIT, 100) /* MiB */ * 1024L * 1024L; + public long getMinFreeDiskUndershot() { + return this.sb.getConfigLong(SwitchboardConstants.RESOURCE_DISK_FREE_MIN_UNDERSHOT, 1024) /* MB */ * 1024L * 1024L; } /** @@ -189,26 +348,4 @@ public class ResourceObserver { return this.sb.getConfigLong(SwitchboardConstants.MEMORY_ACCEPTDHT, 0); } - - /** - * This method calls File.getUsableSpace() from Java 6. - * @param file the path to be checked - * @return "The number of available bytes on the partition or 0L if the abstract pathname does not name a partition." -1L on error. - * @author lotus at mail.berlios.de - */ - /** - public static long getUsableSpace(final File file) { - try { - final Class File6 = Class.forName("java.io.File"); - final Class[] param = {File.class, String.class }; - final Constructor File6Constructor = File6.getConstructor(param); - final Object file6 = File6Constructor.newInstance(file, ""); - final Method getFreeSpace = file6.getClass().getMethod("getUsableSpace", (Class[])null); - final Object space = getFreeSpace.invoke(file6, (Object[])null); - return Long.parseLong(space.toString()); - } catch (final Throwable e) { - return -1L; - } - } - */ } diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index c9a47347b..47ccea051 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -440,10 +440,18 @@ public final class SwitchboardConstants { /** * ResourceObserver + * We apply the naming of control circuit states to resources observer limit values (steady-state value, over/undershot) + * under/overshot states in the system are supposed to be regulated to match the steady-state value + * ATTENTION: be aware that using the autoregulate-option causes that the search index data is DELETED as soon as threshold-values are reached! */ - public static final String DISK_FREE = "disk.free"; - public static final String DISK_FREE_HARDLIMIT = "disk.free.hardlimit"; - + public static final String RESOURCE_DISK_FREE_AUTOREGULATE = "resource.disk.free.autoregulate"; + public static final String RESOURCE_DISK_FREE_MIN_STEADYSTATE = "resource.disk.free.min.steadystate"; // the target steady-state of minimum disk space left + public static final String RESOURCE_DISK_FREE_MIN_UNDERSHOT = "resource.disk.free.min.undershot"; // the undershot below the steady-state of minimum disk free as absolute size + + public static final String RESOURCE_DISK_USED_AUTOREGULATE = "resource.disk.used.autoregulate"; + public static final String RESOURCE_DISK_USED_MAX_STEADYSTATE = "resource.disk.used.max.steadystate"; + public static final String RESOURCE_DISK_USED_MAX_OVERSHOT = "resource.disk.used.max.overshot"; + public static final String MEMORY_ACCEPTDHT = "memory.acceptDHTabove"; public static final String INDEX_RECEIVE_AUTODISABLED = "memory.disabledDHT"; diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index ce97cda68..44270bee9 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -459,6 +459,18 @@ public final class Fulltext { } } + public void deleteOldDocuments(final long deltaToNow, final boolean loaddate) { + Date deleteageDate = new Date(System.currentTimeMillis() - deltaToNow); + final String collection1Query = (loaddate ? CollectionSchema.load_date_dt : CollectionSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]"; + final String webgraphQuery = (loaddate ? WebgraphSchema.load_date_dt : WebgraphSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]"; + try { + this.getDefaultConnector().deleteByQuery(collection1Query); + if (this.getWebgraphConnector() != null) this.getWebgraphConnector().deleteByQuery(webgraphQuery); + } catch (final IOException e) { + } + } + + /** * remove a full subpath from the index * @param subpath the left path of the url; at least until the end of the host