From a15963ff9859989bfce6c081504a077d77b6fe5a Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 9 Feb 2007 10:32:58 +0000 Subject: [PATCH] better balancing: if element from top would force a busy waiting, an element from the bottom of the stack is used instead. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3356 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/plasma/plasmaCrawlBalancer.java | 54 +++++++++++-------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index aa96ba4d1..2dc71a7d8 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -138,38 +138,46 @@ public class plasmaCrawlBalancer { } public String get(long minimumDelta) throws IOException { - // returns an url-hash from the stack + // returns an url-hash from the stack and ensures minimum delta times synchronized (domainStacks) { + if ((stack.size() == 0) && (domainStacks.size() > 0)) flushOnce(); + if (stack.size() == 0) return null; + String entry = null; - if (stack.size() > 0) { - entry = new String(stack.pop().getColBytes(0)); - } else if (domainStacks.size() > 0) { - flushOnce(); + String topentry = new String(stack.top().getColBytes(0)); + + // check if the time after retrieval of last hash from same + // domain is not shorter than the minimumDelta + long delta = lastAccessDelta(topentry); + if (delta > minimumDelta) { + // the entry from top is fine entry = new String(stack.pop().getColBytes(0)); + } else { + // try entry from bottom + entry = new String(stack.pot().getColBytes(0)); + delta = lastAccessDelta(entry); } - if ((minimumDelta > 0) && (entry != null)) { - // check if the time after retrieval of last hash from same - // domain is not shorter than the minimumDelta - String domhash = entry.substring(6); - Long lastAccess = (Long) domainAccess.get(domhash); - if (lastAccess != null) { - // this is not the first access of the same domain - long la = lastAccess.longValue(); - if (System.currentTimeMillis() - la < minimumDelta) { - // force a busy waiting here - // in best case, this should never happen if the balancer works propertly - // this is only to protect against the worst case, where the crawler could - // behave in a DoS-manner - long sleeptime = minimumDelta - (System.currentTimeMillis() - la); - if (sleeptime > 0) try {this.wait(sleeptime);} catch (InterruptedException e) {} - } - } - domainAccess.put(domhash, new Long(System.currentTimeMillis())); + + if (delta < minimumDelta) { + // force a busy waiting here + // in best case, this should never happen if the balancer works propertly + // this is only to protect against the worst case, where the crawler could + // behave in a DoS-manner + long sleeptime = minimumDelta - delta; + try {this.wait(sleeptime);} catch (InterruptedException e) {} } + domainAccess.put(entry.substring(6), new Long(System.currentTimeMillis())); return entry; } } + private long lastAccessDelta(String urlhash) { + assert urlhash != null; + Long lastAccess = (Long) domainAccess.get(urlhash.substring(6)); + if (lastAccess == null) return Long.MAX_VALUE; // never accessed + return System.currentTimeMillis() - lastAccess.longValue(); + } + public byte[] top(int dist) throws IOException { flushSome(1 + dist - stack.size()); // flush only that much as we need to display synchronized (domainStacks) {