possible fix for lost crawl profile handles: clean-up job did wrong measurement to see if crawl is still running.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6465 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent cd6745b292
commit 4c99d4683d

@ -42,7 +42,7 @@ public class IndexCreateLoaderQueue_p {
final serverObjects prop = new serverObjects();
if (sb.crawlQueues.size() == 0) {
if (sb.crawlQueues.workerSize() == 0) {
prop.put("loader-set", "0");
} else {
prop.put("loader-set", "1");

@ -318,7 +318,7 @@ public class PerformanceQueues_p {
// table thread pool settings
prop.put("pool_0_name","Crawler Pool");
prop.put("pool_0_maxActive", sb.getConfigLong("crawler.MaxActiveThreads", 0));
prop.put("pool_0_numActive",sb.crawlQueues.size());
prop.put("pool_0_numActive",sb.crawlQueues.workerSize());
final WorkflowThread httpd = sb.getThread("10_httpd");
prop.put("pool_1_name", "httpd Session Pool");

@ -286,7 +286,7 @@ public class Status {
prop.putNum("connectionsMax", httpd.getMaxSessionCount());
// Queue information
final int loaderJobCount = sb.crawlQueues.size();
final int loaderJobCount = sb.crawlQueues.workerSize();
final int loaderMaxCount = Integer.parseInt(sb.getConfig(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, "10"));
final int loaderPercent = (loaderMaxCount==0)?0:loaderJobCount*100/loaderMaxCount;
prop.putNum("loaderQueueSize", loaderJobCount);

@ -108,7 +108,7 @@ public class WatchCrawler_p {
if ((post.containsKey("autoforward")) &&
(sb.crawlQueues.coreCrawlJobSize() == 0) &&
(sb.crawlQueues.remoteTriggeredCrawlJobSize() == 0) &&
(sb.getActiveQueueSize() < 30)) {
(sb.getIndexingProcessorsQueueSize() < 30)) {
prop.put("forwardToCrawlStart", "1");
}

@ -48,9 +48,9 @@ public class queues_p {
prop.putNum("rwipublictextSize", segment.termIndex().sizesMax());
// loader queue
prop.put("loaderSize", Integer.toString(sb.crawlQueues.size()));
prop.put("loaderSize", Integer.toString(sb.crawlQueues.workerSize()));
prop.put("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10));
if (sb.crawlQueues.size() == 0) {
if (sb.crawlQueues.workerSize() == 0) {
prop.put("list-loader", "0");
} else {
final Request[] w = sb.crawlQueues.activeWorkerEntries();

@ -24,7 +24,6 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import net.yacy.kelondro.logging.Log;
import net.yacy.visualization.PrintTool;
import net.yacy.visualization.RasterPlotter;
import de.anomic.http.server.RequestHeader;

@ -352,13 +352,13 @@ public class CrawlQueues {
return false;
}
if (this.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
if (this.workers.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
// try a cleanup
cleanup();
}
// check again
if (this.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.size() + "), httpClients = " + Client.connectionCount());
if (this.workers.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) {
if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + Client.connectionCount());
return false;
}
@ -514,7 +514,8 @@ public class CrawlQueues {
}
}
public int size() {
public int workerSize() {
return workers.size();
}

@ -26,8 +26,6 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Properties;
import net.yacy.kelondro.logging.Log;
public class whois {
public static Properties Whois(final String dom) {

@ -661,7 +661,7 @@ public final class Switchboard extends serverSwitch {
log.logConfig("Finished Switchboard Initialization");
}
public int getActiveQueueSize() {
public int getIndexingProcessorsQueueSize() {
return
this.indexingDocumentProcessor.queueSize() +
this.indexingCondensementProcessor.queueSize() +
@ -1066,16 +1066,16 @@ public final class Switchboard extends serverSwitch {
}
/**
* {@link CrawlProfile Crawl Profiles} are saved independantly from the queues themselves
* {@link CrawlProfile Crawl Profiles} are saved independently from the queues themselves
* and therefore have to be cleaned up from time to time. This method only performs the clean-up
* if - and only if - the {@link IndexingStack switchboard},
* {@link LoaderDispatcher loader} and {@link plasmaCrawlNURL local crawl} queues are all empty.
* <p>
* Then it iterates through all existing {@link CrawlProfile crawl profiles} and removes
* all profiles which are not hardcoded.
* all profiles which are not hard-coded.
* </p>
* <p>
* <i>If this method encounters DB-failures, the profile DB will be resetted and</i>
* <i>If this method encounters DB-failures, the profile DB will be reseted and</i>
* <code>true</code><i> will be returned</i>
* </p>
* @see #CRAWL_PROFILE_PROXY hardcoded
@ -1088,9 +1088,13 @@ public final class Switchboard extends serverSwitch {
* shutdown procedure
*/
public boolean cleanProfiles() throws InterruptedException {
if ((getActiveQueueSize() > 0) || (crawlQueues.size() > 0) ||
if (getIndexingProcessorsQueueSize() > 0 ||
crawlQueues.workerSize() > 0 ||
crawlQueues.coreCrawlJobSize() > 0 ||
crawlQueues.limitCrawlJobSize() > 0 ||
crawlQueues.remoteTriggeredCrawlJobSize() > 0 ||
(crawlStacker != null && crawlStacker.size() > 0) ||
(crawlQueues.noticeURL.notEmpty()))
crawlQueues.noticeURL.notEmpty())
return false;
return this.crawler.cleanProfiles();
}
@ -1916,10 +1920,10 @@ public final class Switchboard extends serverSwitch {
return "no DHT distribution: not enough words - wordIndex.size() = " + indexSegment.termIndex().sizesMax();
}
if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, "false").equalsIgnoreCase("false")) && (crawlQueues.noticeURL.notEmptyLocal())) {
return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getActiveQueueSize();
return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getIndexingProcessorsQueueSize();
}
if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (getActiveQueueSize() > 1)) {
return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getActiveQueueSize();
if ((getConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, "false").equalsIgnoreCase("false")) && (getIndexingProcessorsQueueSize() > 1)) {
return "no DHT distribution: indexing in progress: noticeURL.stackSize() = " + crawlQueues.noticeURL.size() + ", sbQueue.size() = " + getIndexingProcessorsQueueSize();
}
return null; // this means; yes, please do dht transfer
}

Loading…
Cancel
Save