better balancing: if element from top would force a busy waiting,

an element from the bottom of the stack is used instead.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3356 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent dda24fcb85
commit a15963ff98

@ -138,38 +138,46 @@ public class plasmaCrawlBalancer {
} }
public String get(long minimumDelta) throws IOException { public String get(long minimumDelta) throws IOException {
// returns an url-hash from the stack // returns an url-hash from the stack and ensures minimum delta times
synchronized (domainStacks) { synchronized (domainStacks) {
if ((stack.size() == 0) && (domainStacks.size() > 0)) flushOnce();
if (stack.size() == 0) return null;
String entry = null; String entry = null;
if (stack.size() > 0) { String topentry = new String(stack.top().getColBytes(0));
entry = new String(stack.pop().getColBytes(0));
} else if (domainStacks.size() > 0) { // check if the time after retrieval of last hash from same
flushOnce(); // domain is not shorter than the minimumDelta
long delta = lastAccessDelta(topentry);
if (delta > minimumDelta) {
// the entry from top is fine
entry = new String(stack.pop().getColBytes(0)); entry = new String(stack.pop().getColBytes(0));
} else {
// try entry from bottom
entry = new String(stack.pot().getColBytes(0));
delta = lastAccessDelta(entry);
} }
if ((minimumDelta > 0) && (entry != null)) {
// check if the time after retrieval of last hash from same if (delta < minimumDelta) {
// domain is not shorter than the minimumDelta // force a busy waiting here
String domhash = entry.substring(6); // in best case, this should never happen if the balancer works propertly
Long lastAccess = (Long) domainAccess.get(domhash); // this is only to protect against the worst case, where the crawler could
if (lastAccess != null) { // behave in a DoS-manner
// this is not the first access of the same domain long sleeptime = minimumDelta - delta;
long la = lastAccess.longValue(); try {this.wait(sleeptime);} catch (InterruptedException e) {}
if (System.currentTimeMillis() - la < minimumDelta) {
// force a busy waiting here
// in best case, this should never happen if the balancer works propertly
// this is only to protect against the worst case, where the crawler could
// behave in a DoS-manner
long sleeptime = minimumDelta - (System.currentTimeMillis() - la);
if (sleeptime > 0) try {this.wait(sleeptime);} catch (InterruptedException e) {}
}
}
domainAccess.put(domhash, new Long(System.currentTimeMillis()));
} }
domainAccess.put(entry.substring(6), new Long(System.currentTimeMillis()));
return entry; return entry;
} }
} }
private long lastAccessDelta(String urlhash) {
assert urlhash != null;
Long lastAccess = (Long) domainAccess.get(urlhash.substring(6));
if (lastAccess == null) return Long.MAX_VALUE; // never accessed
return System.currentTimeMillis() - lastAccess.longValue();
}
public byte[] top(int dist) throws IOException { public byte[] top(int dist) throws IOException {
flushSome(1 + dist - stack.size()); // flush only that much as we need to display flushSome(1 + dist - stack.size()); // flush only that much as we need to display
synchronized (domainStacks) { synchronized (domainStacks) {

Loading…
Cancel
Save