diff --git a/source/net/yacy/crawler/Balancer.java b/source/net/yacy/crawler/Balancer.java index c649b668e..62421e9ed 100644 --- a/source/net/yacy/crawler/Balancer.java +++ b/source/net/yacy/crawler/Balancer.java @@ -304,9 +304,11 @@ public class Balancer { public Map getDomainStackHosts(RobotsTxt robots) { Map map = new TreeMap(); // we use a tree map to get a stable ordering for (Map.Entry entry: this.domainStacks.entrySet()) { - int size = entry.getValue().handleSet.size(); - int delta = Latency.waitingRemainingGuessed(entry.getKey(), entry.getValue().hosthash, robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta); - map.put(entry.getKey(), new Integer[]{size, delta}); + final String hostname = entry.getKey(); + final HostHandles hosthandles = entry.getValue(); + int size = hosthandles.handleSet.size(); + int delta = Latency.waitingRemainingGuessed(hostname, hosthandles.hosthash, robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta); + map.put(hostname, new Integer[]{size, delta}); } return map; } @@ -429,9 +431,9 @@ public class Balancer { byte[] failhash = null; while (!this.urlFileIndex.isEmpty()) { byte[] nexthash = getbest(robots); + if (nexthash == null) return null; + synchronized (this) { - if (nexthash == null) return null; - Row.Entry rowEntry = (nexthash == null) ? null : this.urlFileIndex.remove(nexthash); if (rowEntry == null) continue; @@ -515,43 +517,43 @@ public class Balancer { int newCandidatesForward = 1; while (i.hasNext() && nextZeroCandidates.size() < 1000) { entry = i.next(); + final String hostname = entry.getKey(); + final HostHandles hosthandles = entry.getValue(); // clean up empty entries - if (entry.getValue().handleSet.isEmpty()) { + if (hosthandles.handleSet.isEmpty()) { i.remove(); continue; } - final byte[] urlhash = entry.getValue().handleSet.getOne(0); + final byte[] urlhash = hosthandles.handleSet.getOne(0); if (urlhash == null) continue; int w; Row.Entry rowEntry; try { rowEntry = this.urlFileIndex.get(urlhash, false); - if (rowEntry == null) { - continue; - } + if (rowEntry == null) continue; // may have been deleted there manwhile Request crawlEntry = new Request(rowEntry); w = Latency.waitingRemaining(crawlEntry.url(), robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta); - //System.out.println("*** waitingRemaining = " + w + ", guessed = " + Latency.waitingRemainingGuessed(entry.getKey(), this.minimumLocalDelta, this.minimumGlobalDelta)); + //System.out.println("*** waitingRemaining = " + w + ", guessed = " + Latency.waitingRemainingGuessed(hostname, this.minimumLocalDelta, this.minimumGlobalDelta)); //System.out.println("*** explained: " + Latency.waitingRemainingExplain(crawlEntry.url(), robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta)); } catch (final IOException e1) { - w = Latency.waitingRemainingGuessed(entry.getKey(), entry.getValue().hosthash, robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta); + w = Latency.waitingRemainingGuessed(hostname, hosthandles.hosthash, robots, this.myAgentIDs, this.minimumLocalDelta, this.minimumGlobalDelta); } if (w <= 0) { if (w == Integer.MIN_VALUE) { if (newCandidatesForward-- > 0) { - nextZeroCandidates.set(new AbstractMap.SimpleEntry(entry.getKey(), urlhash), 10000); + nextZeroCandidates.set(new AbstractMap.SimpleEntry(hostname, urlhash), 10000); } else { - failoverCandidates.set(new AbstractMap.SimpleEntry(entry.getKey(), urlhash), 0); + failoverCandidates.set(new AbstractMap.SimpleEntry(hostname, urlhash), 0); } } else { - nextZeroCandidates.set(new AbstractMap.SimpleEntry(entry.getKey(), urlhash), entry.getValue().handleSet.size()); + nextZeroCandidates.set(new AbstractMap.SimpleEntry(hostname, urlhash), hosthandles.handleSet.size()); } } else { - failoverCandidates.set(new AbstractMap.SimpleEntry(entry.getKey(), urlhash), w); + failoverCandidates.set(new AbstractMap.SimpleEntry(hostname, urlhash), w); } } //Log.logInfo("Balancer", "*** getbest: created new nextZeroCandidates-list, size = " + nextZeroCandidates.size() + ", domainStacks.size = " + this.domainStacks.size()); @@ -576,7 +578,7 @@ public class Balancer { Map.Entry hosthash; while (k.hasNext()) { hosthash = k.next(); - if (failoverCandidates.get(hosthash) > 1000) break; // thats too long; we want a second chance for this! + if (failoverCandidates.get(hosthash) > 2000) break; // thats too long; we want a second chance for this! besthost = hosthash.getKey(); besturlhash = hosthash.getValue(); removeHashFromDomainStacks(besthost, besturlhash); diff --git a/source/net/yacy/kelondro/index/BufferedObjectIndex.java b/source/net/yacy/kelondro/index/BufferedObjectIndex.java index ec0347eb8..29d11774c 100644 --- a/source/net/yacy/kelondro/index/BufferedObjectIndex.java +++ b/source/net/yacy/kelondro/index/BufferedObjectIndex.java @@ -232,12 +232,18 @@ public class BufferedObjectIndex implements Index, Iterable { @Override public List random(final int count) throws IOException { - final List list = new ArrayList(); + List list0, list1; synchronized (this.backend) { - List list0 = this.buffer.random(count); - list.addAll(list0); - list0 = this.backend.random(count - list.size()); - list.addAll(list0); + list0 = this.buffer.random(Math.max(1, count / 2)); + list1 = this.backend.random(count - list0.size()); + } + // multiplex the lists + final List list = new ArrayList(); + Iterator i0 = list0.iterator(); + Iterator i1 = list1.iterator(); + while (i0.hasNext() || i1.hasNext()) { + if (i0.hasNext()) list.add(i0.next()); + if (i1.hasNext()) list.add(i1.next()); } return list; }