From 8b32dd5f9ea9bc341a08a129b32e491ba0122771 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 18 Apr 2014 06:50:07 +0200 Subject: [PATCH] special strategy for balancer: do not remove targets with zero wait time from the queue --- source/net/yacy/crawler/HostBalancer.java | 14 ++++++++++---- source/net/yacy/crawler/HostQueue.java | 2 +- source/net/yacy/crawler/LegacyBalancer.java | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/source/net/yacy/crawler/HostBalancer.java b/source/net/yacy/crawler/HostBalancer.java index 18f4bd60d..222ba9d89 100644 --- a/source/net/yacy/crawler/HostBalancer.java +++ b/source/net/yacy/crawler/HostBalancer.java @@ -257,14 +257,18 @@ public class HostBalancer implements Balancer { while (i.hasNext()) { String s = i.next(); HostQueue hq = this.queues.get(s); - if (hq == null || hq.size() != 1) {i.remove();} + if (hq == null) {i.remove(); continue;} + int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); + if (hq.size() != 1 && delta > 10) {i.remove();} } } else if (smallStacksExist) { Iterator i = this.roundRobinHostHashes.iterator(); while (i.hasNext()) { String s = i.next(); HostQueue hq = this.queues.get(s); - if (hq == null || hq.size() > 10) {i.remove();} + if (hq == null) {i.remove(); continue;} + int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); + if (hq.size() > 10 && delta > 10) {i.remove();} } } } @@ -280,8 +284,8 @@ public class HostBalancer implements Balancer { nhhi.remove(); continue nosleep; } - int delta = Latency.waitingRemainingGuessed(rhq.getHost(), DigestURL.hosthash(rhq.getHost(), rhq.getPort()), robots, ClientIdentification.yacyInternetCrawlerAgent); - if (delta <= 10 || this.roundRobinHostHashes.size() == 1) { + int delta = Latency.waitingRemainingGuessed(rhq.getHost(), rhh, robots, ClientIdentification.yacyInternetCrawlerAgent); + if (delta <= 10 || this.roundRobinHostHashes.size() == 1 || rhq.size() == 1) { nhhi.remove(); break nosleep; } @@ -289,6 +293,8 @@ public class HostBalancer implements Balancer { if (rhq == null) { // second strategy: take from the largest stack and clean round robin cache + // if we would not clear the round robin cache afterwards + // then all targets would be accessed equally which makes this strategy useless int largest = Integer.MIN_VALUE; for (String h: this.roundRobinHostHashes) { HostQueue hq = this.queues.get(h); diff --git a/source/net/yacy/crawler/HostQueue.java b/source/net/yacy/crawler/HostQueue.java index c2a5c2c9b..72636333b 100644 --- a/source/net/yacy/crawler/HostQueue.java +++ b/source/net/yacy/crawler/HostQueue.java @@ -414,7 +414,7 @@ public class HostQueue implements Balancer { // if not: return null. A calling method must handle the null value and try again profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle())); if (profileEntry == null) { - ConcurrentLog.warn("Balancer", "no profile entry for handle " + crawlEntry.profileHandle()); + ConcurrentLog.fine("Balancer", "no profile entry for handle " + crawlEntry.profileHandle()); continue mainloop; } diff --git a/source/net/yacy/crawler/LegacyBalancer.java b/source/net/yacy/crawler/LegacyBalancer.java index dc8658f06..42fab99c4 100644 --- a/source/net/yacy/crawler/LegacyBalancer.java +++ b/source/net/yacy/crawler/LegacyBalancer.java @@ -387,7 +387,7 @@ public class LegacyBalancer implements Balancer { // if not: return null. A calling method must handle the null value and try again profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle())); if (profileEntry == null) { - ConcurrentLog.warn("Balancer", "no profile entry for handle " + crawlEntry.profileHandle()); + ConcurrentLog.fine("Balancer", "no profile entry for handle " + crawlEntry.profileHandle()); continue; } // depending on the caching policy we need sleep time to avoid DoS-like situations