diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java index ba75b72d4..49f50cb41 100644 --- a/source/net/yacy/crawler/CrawlSwitchboard.java +++ b/source/net/yacy/crawler/CrawlSwitchboard.java @@ -29,16 +29,22 @@ package net.yacy.crawler; import java.io.File; import java.io.IOException; import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.TreeMap; +import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.NaturalOrder; import net.yacy.cora.util.SpaceExceededException; import net.yacy.crawler.data.CrawlProfile; +import net.yacy.crawler.data.CrawlQueues; +import net.yacy.crawler.data.NoticedURL.StackType; +import net.yacy.crawler.retrieval.Request; import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.logging.Log; @@ -158,7 +164,7 @@ public final class CrawlSwitchboard { m = null; } if ( m == null ) { - return null; + return getPassive(profileKey); } p = new CrawlProfile(m); this.profilesActiveCrawlsCache.put(profileKey, p); @@ -464,6 +470,56 @@ public final class CrawlSwitchboard { return hasDoneSomething; } + public int cleanFinishesProfiles(CrawlQueues crawlQueues) { + // find all profiles that are candidates for deletion + Set deletionCandidate = new HashSet(); + for (final byte[] handle: this.getActive()) { + CrawlProfile entry; + entry = new CrawlProfile(this.getActive(handle)); + if (!((entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY)) + || (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE)) + || (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT)) + || (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT)) + || (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA)) + || (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA)) + || (entry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE)))) { + deletionCandidate.add(ASCII.String(handle)); + } + } + if (deletionCandidate.size() == 0) return 0; + + // iterate through all the queues and see if one of these handles appear there + // this is a time-consuming process, set a time-out + long timeout = System.currentTimeMillis() + 60000L; // one minute time + try { + for (StackType stack: StackType.values()) { + Iterator sei = crawlQueues.noticeURL.iterator(stack); + if (sei == null) continue; + Request r; + while (sei.hasNext()) { + r = sei.next(); + deletionCandidate.remove(r.profileHandle()); + if (deletionCandidate.size() == 0) return 0; + if (System.currentTimeMillis() > timeout) return 0; // give up; this is too large + } + if (deletionCandidate.size() == 0) return 0; + } + } catch (Throwable e) { + return 0; + } + + // all entries that are left are candidates for deletion; do that now + for (String h: deletionCandidate) { + byte[] handle = ASCII.getBytes(h); + final CrawlProfile p = this.getActive(handle); + if (p != null) { + this.putPassive(handle, p); + this.removeActive(handle); + } + } + return deletionCandidate.size(); + } + public synchronized void close() { this.profilesActiveCrawlsCache.clear(); this.profilesActiveCrawls.close(); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index fa5af7426..dbb059283 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1625,7 +1625,7 @@ public final class Switchboard extends serverSwitch { } return this.crawler.clear(); } - + public synchronized void close() { this.log.logConfig("SWITCHBOARD SHUTDOWN STEP 1: sending termination signal to managed threads:"); MemoryTracker.stopSystemProfiling(); @@ -2124,8 +2124,10 @@ public final class Switchboard extends serverSwitch { // clean up profiles checkInterruption(); - cleanProfiles(); - + //cleanProfiles(); + int cleanup = this.crawler.cleanFinishesProfiles(this.crawlQueues); + if (cleanup > 0) log.logInfo("cleanup removed " + cleanup + " crawl profiles"); + // clean up news checkInterruption(); try { diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 19ee51173..16d0e0278 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -916,8 +916,7 @@ public final class SearchEvent { // deploy worker to get more results if (!anyWorkerAlive()) { - final int neededInclPrefetch = this.query.neededResults() + ((MemoryControl.available() > 100 * 1024 * 1024 && SNIPPET_WORKER_THREADS >= 8) ? this.query.itemsPerPage : 0); - deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), neededInclPrefetch); + deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), this.query.neededResults()); } try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;}