From fa08ed5ae5d72bddc3cc6a662b23103579e86109 Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Thu, 29 Dec 2011 00:33:16 +0100 Subject: [PATCH] Fixed a lot CHMOD rights (no need for execute flag on *.java/*.html) and introduced local/remote crawl size ratio based check --- htroot/Collage.html | 0 htroot/Collage.java | 0 htroot/IndexCleaner_p.html | 0 htroot/IndexCleaner_p.java | 0 htroot/api/bookmarks/posts/add_p.java | 0 htroot/api/bookmarks/posts/all.java | 0 htroot/api/bookmarks/posts/delete_p.java | 0 htroot/api/bookmarks/posts/get.java | 0 htroot/api/bookmarks/tags/editTag_p.java | 0 htroot/api/bookmarks/tags/getTag.java | 0 htroot/api/bookmarks/xbel/xbel.java | 0 htroot/api/feed.java | 0 htroot/api/getpageinfo_p.java | 0 htroot/api/queues_p.java | 0 htroot/api/ynetSearch.java | 0 htroot/compare_yacy.html | 0 htroot/compare_yacy.java | 0 .../domaingraph/applet/domaingraph.java | 0 .../processing/domaingraph/applet/index.html | 0 htroot/rssTerminal.html | 0 htroot/terminal_p.html | 0 source/de/anomic/crawler/CrawlQueues.java | 35 +++++++++++++++---- source/de/anomic/crawler/NoticedURL.java | 0 source/de/anomic/crawler/ResultImages.java | 0 source/de/anomic/crawler/ZURL.java | 0 .../de/anomic/crawler/retrieval/Request.java | 0 .../de/anomic/crawler/retrieval/Response.java | 0 source/net/yacy/ai/example/ConnectFour.java | 0 source/net/yacy/ai/example/testorder.java | 0 .../net/yacy/ai/greedy/AbstractFinding.java | 0 source/net/yacy/ai/greedy/AbstractModel.java | 0 source/net/yacy/ai/greedy/Agent.java | 0 source/net/yacy/ai/greedy/Asset.java | 0 source/net/yacy/ai/greedy/Attempts.java | 0 source/net/yacy/ai/greedy/Battle.java | 0 source/net/yacy/ai/greedy/Challenge.java | 0 source/net/yacy/ai/greedy/Context.java | 0 source/net/yacy/ai/greedy/Engine.java | 0 source/net/yacy/ai/greedy/Finding.java | 0 source/net/yacy/ai/greedy/Goal.java | 0 source/net/yacy/ai/greedy/Model.java | 0 source/net/yacy/ai/greedy/Role.java | 0 .../net/yacy/cora/protocol/RequestHeader.java | 0 .../yacy/cora/protocol/ResponseHeader.java | 0 source/net/yacy/kelondro/blob/ArrayStack.java | 0 source/net/yacy/kelondro/blob/Heap.java | 0 source/net/yacy/kelondro/table/Relations.java | 0 source/net/yacy/kelondro/util/ISO639.java | 0 source/net/yacy/peers/dht/Dispatcher.java | 0 .../peers/dht/FlatWordPartitionScheme.java | 0 .../net/yacy/peers/dht/PartitionScheme.java | 0 source/net/yacy/peers/dht/PeerSelection.java | 0 .../dht/VerticalWordPartitionScheme.java | 0 53 files changed, 29 insertions(+), 6 deletions(-) mode change 100755 => 100644 htroot/Collage.html mode change 100755 => 100644 htroot/Collage.java mode change 100755 => 100644 htroot/IndexCleaner_p.html mode change 100755 => 100644 htroot/IndexCleaner_p.java mode change 100755 => 100644 htroot/api/bookmarks/posts/add_p.java mode change 100755 => 100644 htroot/api/bookmarks/posts/all.java mode change 100755 => 100644 htroot/api/bookmarks/posts/delete_p.java mode change 100755 => 100644 htroot/api/bookmarks/posts/get.java mode change 100755 => 100644 htroot/api/bookmarks/tags/editTag_p.java mode change 100755 => 100644 htroot/api/bookmarks/tags/getTag.java mode change 100755 => 100644 htroot/api/bookmarks/xbel/xbel.java mode change 100755 => 100644 htroot/api/feed.java mode change 100755 => 100644 htroot/api/getpageinfo_p.java mode change 100755 => 100644 htroot/api/queues_p.java mode change 100755 => 100644 htroot/api/ynetSearch.java mode change 100755 => 100644 htroot/compare_yacy.html mode change 100755 => 100644 htroot/compare_yacy.java mode change 100755 => 100644 htroot/processing/domaingraph/applet/domaingraph.java mode change 100755 => 100644 htroot/processing/domaingraph/applet/index.html mode change 100755 => 100644 htroot/rssTerminal.html mode change 100755 => 100644 htroot/terminal_p.html mode change 100755 => 100644 source/de/anomic/crawler/NoticedURL.java mode change 100755 => 100644 source/de/anomic/crawler/ResultImages.java mode change 100755 => 100644 source/de/anomic/crawler/ZURL.java mode change 100755 => 100644 source/de/anomic/crawler/retrieval/Request.java mode change 100755 => 100644 source/de/anomic/crawler/retrieval/Response.java mode change 100755 => 100644 source/net/yacy/ai/example/ConnectFour.java mode change 100755 => 100644 source/net/yacy/ai/example/testorder.java mode change 100755 => 100644 source/net/yacy/ai/greedy/AbstractFinding.java mode change 100755 => 100644 source/net/yacy/ai/greedy/AbstractModel.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Agent.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Asset.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Attempts.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Battle.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Challenge.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Context.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Engine.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Finding.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Goal.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Model.java mode change 100755 => 100644 source/net/yacy/ai/greedy/Role.java mode change 100755 => 100644 source/net/yacy/cora/protocol/RequestHeader.java mode change 100755 => 100644 source/net/yacy/cora/protocol/ResponseHeader.java mode change 100755 => 100644 source/net/yacy/kelondro/blob/ArrayStack.java mode change 100755 => 100644 source/net/yacy/kelondro/blob/Heap.java mode change 100755 => 100644 source/net/yacy/kelondro/table/Relations.java mode change 100755 => 100644 source/net/yacy/kelondro/util/ISO639.java mode change 100755 => 100644 source/net/yacy/peers/dht/Dispatcher.java mode change 100755 => 100644 source/net/yacy/peers/dht/FlatWordPartitionScheme.java mode change 100755 => 100644 source/net/yacy/peers/dht/PartitionScheme.java mode change 100755 => 100644 source/net/yacy/peers/dht/PeerSelection.java mode change 100755 => 100644 source/net/yacy/peers/dht/VerticalWordPartitionScheme.java diff --git a/htroot/Collage.html b/htroot/Collage.html old mode 100755 new mode 100644 diff --git a/htroot/Collage.java b/htroot/Collage.java old mode 100755 new mode 100644 diff --git a/htroot/IndexCleaner_p.html b/htroot/IndexCleaner_p.html old mode 100755 new mode 100644 diff --git a/htroot/IndexCleaner_p.java b/htroot/IndexCleaner_p.java old mode 100755 new mode 100644 diff --git a/htroot/api/bookmarks/posts/add_p.java b/htroot/api/bookmarks/posts/add_p.java old mode 100755 new mode 100644 diff --git a/htroot/api/bookmarks/posts/all.java b/htroot/api/bookmarks/posts/all.java old mode 100755 new mode 100644 diff --git a/htroot/api/bookmarks/posts/delete_p.java b/htroot/api/bookmarks/posts/delete_p.java old mode 100755 new mode 100644 diff --git a/htroot/api/bookmarks/posts/get.java b/htroot/api/bookmarks/posts/get.java old mode 100755 new mode 100644 diff --git a/htroot/api/bookmarks/tags/editTag_p.java b/htroot/api/bookmarks/tags/editTag_p.java old mode 100755 new mode 100644 diff --git a/htroot/api/bookmarks/tags/getTag.java b/htroot/api/bookmarks/tags/getTag.java old mode 100755 new mode 100644 diff --git a/htroot/api/bookmarks/xbel/xbel.java b/htroot/api/bookmarks/xbel/xbel.java old mode 100755 new mode 100644 diff --git a/htroot/api/feed.java b/htroot/api/feed.java old mode 100755 new mode 100644 diff --git a/htroot/api/getpageinfo_p.java b/htroot/api/getpageinfo_p.java old mode 100755 new mode 100644 diff --git a/htroot/api/queues_p.java b/htroot/api/queues_p.java old mode 100755 new mode 100644 diff --git a/htroot/api/ynetSearch.java b/htroot/api/ynetSearch.java old mode 100755 new mode 100644 diff --git a/htroot/compare_yacy.html b/htroot/compare_yacy.html old mode 100755 new mode 100644 diff --git a/htroot/compare_yacy.java b/htroot/compare_yacy.java old mode 100755 new mode 100644 diff --git a/htroot/processing/domaingraph/applet/domaingraph.java b/htroot/processing/domaingraph/applet/domaingraph.java old mode 100755 new mode 100644 diff --git a/htroot/processing/domaingraph/applet/index.html b/htroot/processing/domaingraph/applet/index.html old mode 100755 new mode 100644 diff --git a/htroot/rssTerminal.html b/htroot/rssTerminal.html old mode 100755 new mode 100644 diff --git a/htroot/terminal_p.html b/htroot/terminal_p.html old mode 100755 new mode 100644 diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java index 935636d4c..1abc88867 100644 --- a/source/de/anomic/crawler/CrawlQueues.java +++ b/source/de/anomic/crawler/CrawlQueues.java @@ -198,11 +198,7 @@ public class CrawlQueues { public boolean coreCrawlJob() { - final boolean robinsonPrivateCase = (this.sb.isRobinsonMode() && - !this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER) && - !this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PRIVATE_CLUSTER)); - - if ((robinsonPrivateCase || coreCrawlJobSize() <= 20) && limitCrawlJobSize() > 0) { + if (coreCrawlJobSize() <= 20 && limitCrawlJobSize() > 0) { // move some tasks to the core crawl job so we have something to do final int toshift = Math.min(10, limitCrawlJobSize()); // this cannot be a big number because the balancer makes a forced waiting if it cannot balance for (int i = 0; i < toshift; i++) { @@ -402,7 +398,8 @@ public class CrawlQueues { return false; } - if (coreCrawlJobSize() > 0 /*&& sb.indexingStorageProcessor.queueSize() > 0*/) { + // Determine ratio local/remote, if lower than 1000 do not do any remote jobs + if (!isLocalRemoteRatioReached()) { if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing"); return false; } @@ -645,4 +642,30 @@ public class CrawlQueues { } + /** + * To determine wether remote-crawling is omitted because of to many local crawls + * + * @return Wether remote-crawling is omitted + */ + private boolean isLocalRemoteRatioReached () { + if (remoteTriggeredCrawlJobSize() == 0) { + // No entries in remote queue + return true; + } + + // Determine ratio + float ratio = (coreCrawlJobSize() / remoteTriggeredCrawlJobSize()); + + // Debug message + Log.logInfo(LoggerNames.LOGGER_CRAWL_QUEUES, + "isLocalRemoteRatioReached: local.size() = " + localCrawlJobSize() + + ", global.size() = " + globalCrawlJobSize() + + ", remoteTriggered.size() = " + remoteTriggeredCrawlJobSize() + + ", ratio = " + new Float(ratio).toString() + ); + + // Check for local/remote ratio is below 1,000 + return (ratio <= sb.getConfigLong("crawler.localRemoteRatio", 1000)); + } + } diff --git a/source/de/anomic/crawler/NoticedURL.java b/source/de/anomic/crawler/NoticedURL.java old mode 100755 new mode 100644 diff --git a/source/de/anomic/crawler/ResultImages.java b/source/de/anomic/crawler/ResultImages.java old mode 100755 new mode 100644 diff --git a/source/de/anomic/crawler/ZURL.java b/source/de/anomic/crawler/ZURL.java old mode 100755 new mode 100644 diff --git a/source/de/anomic/crawler/retrieval/Request.java b/source/de/anomic/crawler/retrieval/Request.java old mode 100755 new mode 100644 diff --git a/source/de/anomic/crawler/retrieval/Response.java b/source/de/anomic/crawler/retrieval/Response.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/example/ConnectFour.java b/source/net/yacy/ai/example/ConnectFour.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/example/testorder.java b/source/net/yacy/ai/example/testorder.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/AbstractFinding.java b/source/net/yacy/ai/greedy/AbstractFinding.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/AbstractModel.java b/source/net/yacy/ai/greedy/AbstractModel.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Agent.java b/source/net/yacy/ai/greedy/Agent.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Asset.java b/source/net/yacy/ai/greedy/Asset.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Attempts.java b/source/net/yacy/ai/greedy/Attempts.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Battle.java b/source/net/yacy/ai/greedy/Battle.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Challenge.java b/source/net/yacy/ai/greedy/Challenge.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Context.java b/source/net/yacy/ai/greedy/Context.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Engine.java b/source/net/yacy/ai/greedy/Engine.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Finding.java b/source/net/yacy/ai/greedy/Finding.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Goal.java b/source/net/yacy/ai/greedy/Goal.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Model.java b/source/net/yacy/ai/greedy/Model.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/ai/greedy/Role.java b/source/net/yacy/ai/greedy/Role.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/cora/protocol/RequestHeader.java b/source/net/yacy/cora/protocol/RequestHeader.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/cora/protocol/ResponseHeader.java b/source/net/yacy/cora/protocol/ResponseHeader.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/kelondro/blob/ArrayStack.java b/source/net/yacy/kelondro/blob/ArrayStack.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/kelondro/blob/Heap.java b/source/net/yacy/kelondro/blob/Heap.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/kelondro/table/Relations.java b/source/net/yacy/kelondro/table/Relations.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/kelondro/util/ISO639.java b/source/net/yacy/kelondro/util/ISO639.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/peers/dht/Dispatcher.java b/source/net/yacy/peers/dht/Dispatcher.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/peers/dht/FlatWordPartitionScheme.java b/source/net/yacy/peers/dht/FlatWordPartitionScheme.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/peers/dht/PartitionScheme.java b/source/net/yacy/peers/dht/PartitionScheme.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/peers/dht/PeerSelection.java b/source/net/yacy/peers/dht/PeerSelection.java old mode 100755 new mode 100644 diff --git a/source/net/yacy/peers/dht/VerticalWordPartitionScheme.java b/source/net/yacy/peers/dht/VerticalWordPartitionScheme.java old mode 100755 new mode 100644