From 7d5544e9b188fa1e60176a03f8d5fa327dda8f56 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 29 Nov 2007 02:54:59 +0000 Subject: [PATCH] added some security checks to new remote crawl pull method to prevent that indexer is overloaded git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4234 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../plasma/crawler/plasmaCrawlQueues.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java index 4719fd882..3a8edfcba 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java @@ -227,14 +227,23 @@ public class plasmaCrawlQueues { public boolean remoteCrawlLoaderJob() { // check if we are allowed to crawl urls provided by other peers - if (!yacyCore.seedDB.mySeed().getFlagAcceptRemoteCrawl()) return false; + if (!yacyCore.seedDB.mySeed().getFlagAcceptRemoteCrawl()) { + //this.log.logInfo("remoteCrawlLoaderJob: not done, we are not allowed to do that"); + return false; + } // check if we are a senior peer - if (!yacyCore.seedDB.mySeed().isActive()) return false; + if (!yacyCore.seedDB.mySeed().isActive()) { + //this.log.logInfo("remoteCrawlLoaderJob: not done, this should be a senior or principal peer"); + return false; + } - // check if we have an entry in the provider list, othervise fill the list + // check if we have an entry in the provider list, otherwise fill the list yacySeed seed; - if ((remoteCrawlProviderHashes.size() == 0) && (remoteTriggeredCrawlJobSize() == 0)) { + if ((remoteCrawlProviderHashes.size() == 0) && + (coreCrawlJobSize() == 0) && + (remoteTriggeredCrawlJobSize() == 0) && + (sb.queueSize() < 10)) { if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) { Iterator e = yacyCore.dhtAgent.getProvidesRemoteCrawlURLs(); while (e.hasNext()) {