special strategy for balancer: do not remove targets with zero wait time

from the queue
pull/1/head
Michael Peter Christen 11 years ago
parent 9c6228d948
commit 8b32dd5f9e

@ -257,14 +257,18 @@ public class HostBalancer implements Balancer {
while (i.hasNext()) { while (i.hasNext()) {
String s = i.next(); String s = i.next();
HostQueue hq = this.queues.get(s); HostQueue hq = this.queues.get(s);
if (hq == null || hq.size() != 1) {i.remove();} if (hq == null) {i.remove(); continue;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (hq.size() != 1 && delta > 10) {i.remove();}
} }
} else if (smallStacksExist) { } else if (smallStacksExist) {
Iterator<String> i = this.roundRobinHostHashes.iterator(); Iterator<String> i = this.roundRobinHostHashes.iterator();
while (i.hasNext()) { while (i.hasNext()) {
String s = i.next(); String s = i.next();
HostQueue hq = this.queues.get(s); HostQueue hq = this.queues.get(s);
if (hq == null || hq.size() > 10) {i.remove();} if (hq == null) {i.remove(); continue;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (hq.size() > 10 && delta > 10) {i.remove();}
} }
} }
} }
@ -280,8 +284,8 @@ public class HostBalancer implements Balancer {
nhhi.remove(); nhhi.remove();
continue nosleep; continue nosleep;
} }
int delta = Latency.waitingRemainingGuessed(rhq.getHost(), DigestURL.hosthash(rhq.getHost(), rhq.getPort()), robots, ClientIdentification.yacyInternetCrawlerAgent); int delta = Latency.waitingRemainingGuessed(rhq.getHost(), rhh, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (delta <= 10 || this.roundRobinHostHashes.size() == 1) { if (delta <= 10 || this.roundRobinHostHashes.size() == 1 || rhq.size() == 1) {
nhhi.remove(); nhhi.remove();
break nosleep; break nosleep;
} }
@ -289,6 +293,8 @@ public class HostBalancer implements Balancer {
if (rhq == null) { if (rhq == null) {
// second strategy: take from the largest stack and clean round robin cache // second strategy: take from the largest stack and clean round robin cache
// if we would not clear the round robin cache afterwards
// then all targets would be accessed equally which makes this strategy useless
int largest = Integer.MIN_VALUE; int largest = Integer.MIN_VALUE;
for (String h: this.roundRobinHostHashes) { for (String h: this.roundRobinHostHashes) {
HostQueue hq = this.queues.get(h); HostQueue hq = this.queues.get(h);

@ -414,7 +414,7 @@ public class HostQueue implements Balancer {
// if not: return null. A calling method must handle the null value and try again // if not: return null. A calling method must handle the null value and try again
profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle())); profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle()));
if (profileEntry == null) { if (profileEntry == null) {
ConcurrentLog.warn("Balancer", "no profile entry for handle " + crawlEntry.profileHandle()); ConcurrentLog.fine("Balancer", "no profile entry for handle " + crawlEntry.profileHandle());
continue mainloop; continue mainloop;
} }

@ -387,7 +387,7 @@ public class LegacyBalancer implements Balancer {
// if not: return null. A calling method must handle the null value and try again // if not: return null. A calling method must handle the null value and try again
profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle())); profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle()));
if (profileEntry == null) { if (profileEntry == null) {
ConcurrentLog.warn("Balancer", "no profile entry for handle " + crawlEntry.profileHandle()); ConcurrentLog.fine("Balancer", "no profile entry for handle " + crawlEntry.profileHandle());
continue; continue;
} }
// depending on the caching policy we need sleep time to avoid DoS-like situations // depending on the caching policy we need sleep time to avoid DoS-like situations

Loading…
Cancel
Save