From d50be59088d123db86d04cd7c786da503af6a32c Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 6 Jun 2009 09:34:44 +0000 Subject: [PATCH] - added a automatic re-construction of the domain stack after 10 minutes. this includes then urls to the domain stack that were left over in case of stack size limitations when the domain stack was created the last time - changed the busy sleep time for the crawl thread to 30 millisecons. This is sufficient to crawl with 2000 PPM. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6028 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- defaults/yacy.init | 2 +- source/de/anomic/crawler/Balancer.java | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 8f8a50a2b..8ed45d7c9 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -579,7 +579,7 @@ filterOutStopwordsFromTopwords=true 40_peerseedcycle_busysleep=1200000 40_peerseedcycle_memprereq=4194304 50_localcrawl_idlesleep=2000 -50_localcrawl_busysleep=2 +50_localcrawl_busysleep=30 50_localcrawl_memprereq=12582912 50_localcrawl_isPaused=false 60_remotecrawlloader_idlesleep=60000 diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index 8b61cc951..57bade44b 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -53,6 +53,7 @@ public class Balancer { private long minimumLocalDelta; private long minimumGlobalDelta; private int profileErrors; + private long lastDomainStackFill; public Balancer(final File cachePath, final String stackname, final boolean fullram, final long minimumLocalDelta, final long minimumGlobalDelta) { @@ -68,6 +69,7 @@ public class Balancer { File f = new File(cacheStacksPath, stackname + indexSuffix); urlFileIndex = new EcoTable(f, CrawlEntry.rowdef, (fullram) ? EcoTable.tailCacheUsageAuto : EcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0); profileErrors = 0; + lastDomainStackFill = 0; Log.logInfo("Balancer", "opened balancer file with " + urlFileIndex.size() + " entries from " + f.toString()); } @@ -376,7 +378,9 @@ public class Balancer { } private void fillDomainStacks(int maxdomstacksize) throws IOException { - if (this.domainStacks.size() > 0) return; + if (this.domainStacks.size() > 0 && System.currentTimeMillis() - lastDomainStackFill < 600000L) return; + this.domainStacks.clear(); + this.lastDomainStackFill = System.currentTimeMillis(); CloneableIterator i = this.urlFileIndex.keys(true, null); while (i.hasNext()) { pushHashToDomainStacks(new String(i.next()), 50);