better balancing: if element from top would force a busy waiting,

an element from the bottom of the stack is used instead.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3356 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent dda24fcb85
commit a15963ff98

@ -138,38 +138,46 @@ public class plasmaCrawlBalancer {
}
public String get(long minimumDelta) throws IOException {
// returns an url-hash from the stack
// returns an url-hash from the stack and ensures minimum delta times
synchronized (domainStacks) {
if ((stack.size() == 0) && (domainStacks.size() > 0)) flushOnce();
if (stack.size() == 0) return null;
String entry = null;
if (stack.size() > 0) {
entry = new String(stack.pop().getColBytes(0));
} else if (domainStacks.size() > 0) {
flushOnce();
String topentry = new String(stack.top().getColBytes(0));
// check if the time after retrieval of last hash from same
// domain is not shorter than the minimumDelta
long delta = lastAccessDelta(topentry);
if (delta > minimumDelta) {
// the entry from top is fine
entry = new String(stack.pop().getColBytes(0));
} else {
// try entry from bottom
entry = new String(stack.pot().getColBytes(0));
delta = lastAccessDelta(entry);
}
if ((minimumDelta > 0) && (entry != null)) {
// check if the time after retrieval of last hash from same
// domain is not shorter than the minimumDelta
String domhash = entry.substring(6);
Long lastAccess = (Long) domainAccess.get(domhash);
if (lastAccess != null) {
// this is not the first access of the same domain
long la = lastAccess.longValue();
if (System.currentTimeMillis() - la < minimumDelta) {
// force a busy waiting here
// in best case, this should never happen if the balancer works propertly
// this is only to protect against the worst case, where the crawler could
// behave in a DoS-manner
long sleeptime = minimumDelta - (System.currentTimeMillis() - la);
if (sleeptime > 0) try {this.wait(sleeptime);} catch (InterruptedException e) {}
}
}
domainAccess.put(domhash, new Long(System.currentTimeMillis()));
if (delta < minimumDelta) {
// force a busy waiting here
// in best case, this should never happen if the balancer works propertly
// this is only to protect against the worst case, where the crawler could
// behave in a DoS-manner
long sleeptime = minimumDelta - delta;
try {this.wait(sleeptime);} catch (InterruptedException e) {}
}
domainAccess.put(entry.substring(6), new Long(System.currentTimeMillis()));
return entry;
}
}
private long lastAccessDelta(String urlhash) {
assert urlhash != null;
Long lastAccess = (Long) domainAccess.get(urlhash.substring(6));
if (lastAccess == null) return Long.MAX_VALUE; // never accessed
return System.currentTimeMillis() - lastAccess.longValue();
}
public byte[] top(int dist) throws IOException {
flushSome(1 + dist - stack.size()); // flush only that much as we need to display
synchronized (domainStacks) {

Loading…
Cancel
Save