diff --git a/source/net/yacy/crawler/HostBalancer.java b/source/net/yacy/crawler/HostBalancer.java index 8cc9b2d9a..5a3671f2e 100644 --- a/source/net/yacy/crawler/HostBalancer.java +++ b/source/net/yacy/crawler/HostBalancer.java @@ -297,7 +297,7 @@ public class HostBalancer implements Balancer { String s = i.next(); HostQueue hq = this.queues.get(s); if (hq == null) {i.remove(); continue smallstacks;} - int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); + int delta = Latency.waitingRemainingGuessed(hq.getHost(), hq.getPort(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); if (delta < 0) continue; // keep all non-waiting stacks; they are useful to speed up things // to protect all small stacks which have a fast throughput, remove all with long waiting time if (delta >= 1000) {i.remove(); continue smallstacks;} @@ -332,7 +332,7 @@ public class HostBalancer implements Balancer { mixedstrategy: for (String h: this.roundRobinHostHashes) { HostQueue hq = this.queues.get(h); if (hq != null) { - int delta = Latency.waitingRemainingGuessed(hq.getHost(), h, robots, ClientIdentification.yacyInternetCrawlerAgent) / 200; + int delta = Latency.waitingRemainingGuessed(hq.getHost(), hq.getPort(), h, robots, ClientIdentification.yacyInternetCrawlerAgent) / 200; if (delta < 0) delta = 0; List queueHashes = fastTree.get(delta); if (queueHashes == null) { @@ -427,7 +427,7 @@ public class HostBalancer implements Balancer { String s = i.next(); HostQueue hq = this.queues.get(s); if (hq == null) {i.remove(); continue protectcheck;} - int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); + int delta = Latency.waitingRemainingGuessed(hq.getHost(), hq.getPort(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); if (delta >= 0) {i.remove();} } } @@ -488,7 +488,7 @@ public class HostBalancer implements Balancer { public Map getDomainStackHosts(RobotsTxt robots) { Map map = new TreeMap(); // we use a tree map to get a stable ordering for (HostQueue hq: this.queues.values()) try { - int delta = Latency.waitingRemainingGuessed(hq.getHost(), DigestURL.hosthash(hq.getHost(), hq.getPort()), robots, ClientIdentification.yacyInternetCrawlerAgent); + int delta = Latency.waitingRemainingGuessed(hq.getHost(), hq.getPort(), DigestURL.hosthash(hq.getHost(), hq.getPort()), robots, ClientIdentification.yacyInternetCrawlerAgent); map.put(hq.getHost() + ":" + hq.getPort(), new Integer[]{hq.size(), delta}); } catch (MalformedURLException e) { ConcurrentLog.logException(e); diff --git a/source/net/yacy/crawler/HostQueue.java b/source/net/yacy/crawler/HostQueue.java index 105b0c53d..88c380292 100644 --- a/source/net/yacy/crawler/HostQueue.java +++ b/source/net/yacy/crawler/HostQueue.java @@ -518,7 +518,7 @@ public class HostQueue implements Balancer { @Override public Map getDomainStackHosts(RobotsTxt robots) { Map map = new TreeMap(); - int delta = Latency.waitingRemainingGuessed(this.hostName, this.hostHash, robots, ClientIdentification.yacyInternetCrawlerAgent); + int delta = Latency.waitingRemainingGuessed(this.hostName, this.port, this.hostHash, robots, ClientIdentification.yacyInternetCrawlerAgent); map.put(this.hostName, new Integer[]{this.size(), delta}); return map; } diff --git a/source/net/yacy/crawler/LegacyBalancer.java b/source/net/yacy/crawler/LegacyBalancer.java index c679e859c..5a885e446 100644 --- a/source/net/yacy/crawler/LegacyBalancer.java +++ b/source/net/yacy/crawler/LegacyBalancer.java @@ -283,7 +283,7 @@ public class LegacyBalancer implements Balancer { final String hostname = entry.getKey(); final HostHandles hosthandles = entry.getValue(); int size = hosthandles.handleSet.size(); - int delta = Latency.waitingRemainingGuessed(hostname, hosthandles.hosthash, robots, ClientIdentification.yacyInternetCrawlerAgent); + int delta = Latency.waitingRemainingGuessed(hostname, 80, hosthandles.hosthash, robots, ClientIdentification.yacyInternetCrawlerAgent); map.put(hostname, new Integer[]{size, delta}); } return map; diff --git a/source/net/yacy/crawler/data/Latency.java b/source/net/yacy/crawler/data/Latency.java index e73ab95df..f524f5bfb 100644 --- a/source/net/yacy/crawler/data/Latency.java +++ b/source/net/yacy/crawler/data/Latency.java @@ -146,7 +146,7 @@ public class Latency { * @return the remaining waiting time in milliseconds. The return value may be negative * which expresses how long the time is over the minimum waiting time. */ - public static int waitingRemainingGuessed(final String hostname, final String hosthash, final RobotsTxt robots, final ClientIdentification.Agent agent) { + public static int waitingRemainingGuessed(final String hostname, final int port, final String hosthash, final RobotsTxt robots, final ClientIdentification.Agent agent) { // first check if the domain was _ever_ accessed before final Host host = map.get(hosthash); @@ -171,7 +171,7 @@ public class Latency { // find the delay as given by robots.txt on target site if (robots != null) { - int robotsDelay = waitingRobots(hostname + ":80", robots, agent, false); + int robotsDelay = waitingRobots(hostname + ":" + port, robots, agent, false); if (robotsDelay < 0) return -timeSinceLastAccess; // no limits if granted exclusively for this peer waiting = Math.max(waiting, robotsDelay); }