enhanced crawler

pull/1/head
Michael Peter Christen 13 years ago
parent e101c2e0e2
commit 2fa037ae1d

@ -517,11 +517,13 @@ public class Balancer {
if (!this.domainStacks.isEmpty() && System.currentTimeMillis() - this.lastDomainStackFill < 60000L) return; if (!this.domainStacks.isEmpty() && System.currentTimeMillis() - this.lastDomainStackFill < 60000L) return;
this.domainStacks.clear(); this.domainStacks.clear();
this.lastDomainStackFill = System.currentTimeMillis(); this.lastDomainStackFill = System.currentTimeMillis();
final HandleSet handles = this.urlFileIndex.keysFromBuffer(objectIndexBufferSize / 2); //final HandleSet handles = this.urlFileIndex.keysFromBuffer(objectIndexBufferSize / 2);
final CloneableIterator<byte[]> i = handles.keys(true, null); //final CloneableIterator<byte[]> i = handles.keys(true, null);
final CloneableIterator<byte[]> i = this.urlFileIndex.keys(true, null);
byte[] handle; byte[] handle;
String host; String host;
Request request; Request request;
int count = 0;
while (i.hasNext()) { while (i.hasNext()) {
handle = i.next(); handle = i.next();
final Row.Entry entry = this.urlFileIndex.get(handle, false); final Row.Entry entry = this.urlFileIndex.get(handle, false);
@ -533,6 +535,8 @@ public class Balancer {
} catch (final RowSpaceExceededException e) { } catch (final RowSpaceExceededException e) {
break; break;
} }
count++;
if (this.domainStacks.size() > 0 && count > 120 * this.domainStacks.size()) break;
} }
Log.logInfo("BALANCER", "re-fill of domain stacks; fileIndex.size() = " + this.urlFileIndex.size() + ", domainStacks.size = " + this.domainStacks.size() + ", collection time = " + (System.currentTimeMillis() - this.lastDomainStackFill) + " ms"); Log.logInfo("BALANCER", "re-fill of domain stacks; fileIndex.size() = " + this.urlFileIndex.size() + ", domainStacks.size = " + this.domainStacks.size() + ", collection time = " + (System.currentTimeMillis() - this.lastDomainStackFill) + " ms");
this.domStackInitSize = this.domainStacks.size(); this.domStackInitSize = this.domainStacks.size();

Loading…
Cancel
Save