From 4c99d4683d78f442acbbb6813da0311d832ce587 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 6 Nov 2009 23:15:20 +0000 Subject: [PATCH] possible fix for lost crawl profile handles: clean-up job did wrong measurement to see if crawl is still running. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6465 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreateLoaderQueue_p.java | 2 +- htroot/PerformanceQueues_p.java | 2 +- htroot/Status.java | 2 +- htroot/WatchCrawler_p.java | 2 +- htroot/api/queues_p.java | 4 ++-- htroot/imagetest.java | 1 - source/de/anomic/crawler/CrawlQueues.java | 9 +++++---- source/de/anomic/net/whois.java | 2 -- source/de/anomic/search/Switchboard.java | 22 +++++++++++++--------- 9 files changed, 24 insertions(+), 22 deletions(-) diff --git a/htroot/IndexCreateLoaderQueue_p.java b/htroot/IndexCreateLoaderQueue_p.java index 9b8649730..9acee7df4 100644 --- a/htroot/IndexCreateLoaderQueue_p.java +++ b/htroot/IndexCreateLoaderQueue_p.java @@ -42,7 +42,7 @@ public class IndexCreateLoaderQueue_p { final serverObjects prop = new serverObjects(); - if (sb.crawlQueues.size() == 0) { + if (sb.crawlQueues.workerSize() == 0) { prop.put("loader-set", "0"); } else { prop.put("loader-set", "1"); diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index 2725947a2..ab1fc226e 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -318,7 +318,7 @@ public class PerformanceQueues_p { // table thread pool settings prop.put("pool_0_name","Crawler Pool"); prop.put("pool_0_maxActive", sb.getConfigLong("crawler.MaxActiveThreads", 0)); - prop.put("pool_0_numActive",sb.crawlQueues.size()); + prop.put("pool_0_numActive",sb.crawlQueues.workerSize()); final WorkflowThread httpd = sb.getThread("10_httpd"); prop.put("pool_1_name", "httpd Session Pool"); diff --git a/htroot/Status.java b/htroot/Status.java index a30ebc556..98cf97cd4 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -286,7 +286,7 @@ public class Status { prop.putNum("connectionsMax", httpd.getMaxSessionCount()); // Queue information - final int loaderJobCount = sb.crawlQueues.size(); + final int loaderJobCount = sb.crawlQueues.workerSize(); final int loaderMaxCount = Integer.parseInt(sb.getConfig(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, "10")); final int loaderPercent = (loaderMaxCount==0)?0:loaderJobCount*100/loaderMaxCount; prop.putNum("loaderQueueSize", loaderJobCount); diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 0c7fdae43..0c753eb9b 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -108,7 +108,7 @@ public class WatchCrawler_p { if ((post.containsKey("autoforward")) && (sb.crawlQueues.coreCrawlJobSize() == 0) && (sb.crawlQueues.remoteTriggeredCrawlJobSize() == 0) && - (sb.getActiveQueueSize() < 30)) { + (sb.getIndexingProcessorsQueueSize() < 30)) { prop.put("forwardToCrawlStart", "1"); } diff --git a/htroot/api/queues_p.java b/htroot/api/queues_p.java index b86fcc658..decbcdcbb 100755 --- a/htroot/api/queues_p.java +++ b/htroot/api/queues_p.java @@ -48,9 +48,9 @@ public class queues_p { prop.putNum("rwipublictextSize", segment.termIndex().sizesMax()); // loader queue - prop.put("loaderSize", Integer.toString(sb.crawlQueues.size())); + prop.put("loaderSize", Integer.toString(sb.crawlQueues.workerSize())); prop.put("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)); - if (sb.crawlQueues.size() == 0) { + if (sb.crawlQueues.workerSize() == 0) { prop.put("list-loader", "0"); } else { final Request[] w = sb.crawlQueues.activeWorkerEntries(); diff --git a/htroot/imagetest.java b/htroot/imagetest.java index 0f811c8d6..96292c904 100644 --- a/htroot/imagetest.java +++ b/htroot/imagetest.java @@ -24,7 +24,6 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import net.yacy.kelondro.logging.Log; import net.yacy.visualization.PrintTool; import net.yacy.visualization.RasterPlotter; import de.anomic.http.server.RequestHeader; diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java index abea1762f..af026bfd3 100644 --- a/source/de/anomic/crawler/CrawlQueues.java +++ b/source/de/anomic/crawler/CrawlQueues.java @@ -352,13 +352,13 @@ public class CrawlQueues { return false; } - if (this.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) { + if (this.workers.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) { // try a cleanup cleanup(); } // check again - if (this.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) { - if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.size() + "), httpClients = " + Client.connectionCount()); + if (this.workers.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) { + if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + Client.connectionCount()); return false; } @@ -514,7 +514,8 @@ public class CrawlQueues { } } - public int size() { + + public int workerSize() { return workers.size(); } diff --git a/source/de/anomic/net/whois.java b/source/de/anomic/net/whois.java index 9308c6a6b..53cc71bc4 100644 --- a/source/de/anomic/net/whois.java +++ b/source/de/anomic/net/whois.java @@ -26,8 +26,6 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.Properties; -import net.yacy.kelondro.logging.Log; - public class whois { public static Properties Whois(final String dom) { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index a4dfb723e..dbb4b6238 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -661,7 +661,7 @@ public final class Switchboard extends serverSwitch { log.logConfig("Finished Switchboard Initialization"); } - public int getActiveQueueSize() { + public int getIndexingProcessorsQueueSize() { return this.indexingDocumentProcessor.queueSize() + this.indexingCondensementProcessor.queueSize() + @@ -1066,16 +1066,16 @@ public final class Switchboard extends serverSwitch { } /** - * {@link CrawlProfile Crawl Profiles} are saved independantly from the queues themselves + * {@link CrawlProfile Crawl Profiles} are saved independently from the queues themselves * and therefore have to be cleaned up from time to time. This method only performs the clean-up * if - and only if - the {@link IndexingStack switchboard}, * {@link LoaderDispatcher loader} and {@link plasmaCrawlNURL local crawl} queues are all empty. *

* Then it iterates through all existing {@link CrawlProfile crawl profiles} and removes - * all profiles which are not hardcoded. + * all profiles which are not hard-coded. *

*

- * If this method encounters DB-failures, the profile DB will be resetted and + * If this method encounters DB-failures, the profile DB will be reseted and * true will be returned *

* @see #CRAWL_PROFILE_PROXY hardcoded @@ -1088,9 +1088,13 @@ public final class Switchboard extends serverSwitch { * shutdown procedure */ public boolean cleanProfiles() throws InterruptedException { - if ((getActiveQueueSize() > 0) || (crawlQueues.size() > 0) || + if (getIndexingProcessorsQueueSize() > 0 || + crawlQueues.workerSize() > 0 || + crawlQueues.coreCrawlJobSize() > 0 || + crawlQueues.limitCrawlJobSize() > 0 || + crawlQueues.remoteTriggeredCrawlJobSize() > 0 || (crawlStacker != null && crawlStacker.size() > 0) || - (crawlQueues.noticeURL.notEmpty())) + crawlQueues.noticeURL.notEmpty()) return false; return this.crawler.cleanProfiles(); } @@ -1916,10 +1920,10 @@ public final class Switchboard extends serverSwitch { return "no DHT distribution: not enough words - wordIndex.size() = " + indexSegment.termIndex().sizesMax(); } if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, "false").equalsIgnoreCase("false")) && (crawlQueues.noticeURL.notEmptyLocal())) { - return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getActiveQueueSize(); + return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getIndexingProcessorsQueueSize(); } - if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (getActiveQueueSize() > 1)) { - return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getActiveQueueSize(); + if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (getIndexingProcessorsQueueSize() > 1)) { + return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getIndexingProcessorsQueueSize(); } return null; // this means; yes, please do dht transfer }