- added a automatic re-construction of the domain stack after 10 minutes. this includes then urls to the domain stack that were left over in case of stack size limitations when the domain stack was created the last time

- changed the busy sleep time for the crawl thread to 30 millisecons. This is sufficient to crawl with 2000 PPM.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6028 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 5fdba0fa51
commit d50be59088

@ -579,7 +579,7 @@ filterOutStopwordsFromTopwords=true
40_peerseedcycle_busysleep=1200000 40_peerseedcycle_busysleep=1200000
40_peerseedcycle_memprereq=4194304 40_peerseedcycle_memprereq=4194304
50_localcrawl_idlesleep=2000 50_localcrawl_idlesleep=2000
50_localcrawl_busysleep=2 50_localcrawl_busysleep=30
50_localcrawl_memprereq=12582912 50_localcrawl_memprereq=12582912
50_localcrawl_isPaused=false 50_localcrawl_isPaused=false
60_remotecrawlloader_idlesleep=60000 60_remotecrawlloader_idlesleep=60000

@ -53,6 +53,7 @@ public class Balancer {
private long minimumLocalDelta; private long minimumLocalDelta;
private long minimumGlobalDelta; private long minimumGlobalDelta;
private int profileErrors; private int profileErrors;
private long lastDomainStackFill;
public Balancer(final File cachePath, final String stackname, final boolean fullram, public Balancer(final File cachePath, final String stackname, final boolean fullram,
final long minimumLocalDelta, final long minimumGlobalDelta) { final long minimumLocalDelta, final long minimumGlobalDelta) {
@ -68,6 +69,7 @@ public class Balancer {
File f = new File(cacheStacksPath, stackname + indexSuffix); File f = new File(cacheStacksPath, stackname + indexSuffix);
urlFileIndex = new EcoTable(f, CrawlEntry.rowdef, (fullram) ? EcoTable.tailCacheUsageAuto : EcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0); urlFileIndex = new EcoTable(f, CrawlEntry.rowdef, (fullram) ? EcoTable.tailCacheUsageAuto : EcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0);
profileErrors = 0; profileErrors = 0;
lastDomainStackFill = 0;
Log.logInfo("Balancer", "opened balancer file with " + urlFileIndex.size() + " entries from " + f.toString()); Log.logInfo("Balancer", "opened balancer file with " + urlFileIndex.size() + " entries from " + f.toString());
} }
@ -376,7 +378,9 @@ public class Balancer {
} }
private void fillDomainStacks(int maxdomstacksize) throws IOException { private void fillDomainStacks(int maxdomstacksize) throws IOException {
if (this.domainStacks.size() > 0) return; if (this.domainStacks.size() > 0 && System.currentTimeMillis() - lastDomainStackFill < 600000L) return;
this.domainStacks.clear();
this.lastDomainStackFill = System.currentTimeMillis();
CloneableIterator<byte[]> i = this.urlFileIndex.keys(true, null); CloneableIterator<byte[]> i = this.urlFileIndex.keys(true, null);
while (i.hasNext()) { while (i.hasNext()) {
pushHashToDomainStacks(new String(i.next()), 50); pushHashToDomainStacks(new String(i.next()), 50);

Loading…
Cancel
Save