From 4783a309105823268c7081458ed9c0534c74cada Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 16 Mar 2007 15:16:26 +0000 Subject: [PATCH] - fixed a flush problem in balancer - return to idle divisor in RWI RAM cache flush git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3485 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- .../de/anomic/plasma/plasmaCrawlBalancer.java | 37 ++++++++++++------- source/de/anomic/plasma/plasmaWordIndex.java | 5 ++- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/build.properties b/build.properties index 3d9a2cc7d..61f1ce5a5 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.507 +releaseVersion=0.508 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index a85a93589..d8c11cc69 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -185,9 +185,14 @@ public class plasmaCrawlBalancer { public synchronized int size() { int componentsize = urlFileStack.size() + urlRAMStack.size() + sizeDomainStacks(); try { - if ((kelondroRecords.debugmode) && (componentsize != urlFileIndex.size())) { + if (componentsize != urlFileIndex.size()) { // hier ist urlIndexFile.size() immer grš§er. warum? - serverLog.logWarning("PLASMA BALANCER", "size operation wrong - componentsize = " + componentsize + ", ramIndex.size() = " + urlFileIndex.size()); + if (kelondroRecords.debugmode) { + serverLog.logWarning("PLASMA BALANCER", "size operation wrong in " + stackname + " - componentsize = " + componentsize + ", urlFileIndex.size() = " + urlFileIndex.size()); + } + if (componentsize == 0) { + resetFileIndex(); + } } } catch (IOException e) { e.printStackTrace(); @@ -238,7 +243,7 @@ public class plasmaCrawlBalancer { public synchronized void push(plasmaCrawlEntry entry) throws IOException { assert entry != null; if (urlFileIndex.has(entry.urlhash().getBytes())) { - serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " - fixed"); + serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " in " + stackname + " - fixed"); return; } @@ -410,21 +415,27 @@ public class plasmaCrawlBalancer { } public synchronized plasmaCrawlEntry top(int dist) throws IOException { - int availableInRam = urlRAMStack.size() + sizeDomainStacks(); - if ((availableInRam <= dist) && (urlFileStack.size() > (dist - availableInRam))) { - // flush some entries from disc to domain stacks + // if we need to flush anything, then flush the domain stack first, + // to avoid that new urls get hidden by old entries from the file stack + while ((sizeDomainStacks() > 0) && (urlRAMStack.size() <= dist)) { + // flush only that much as we need to display + flushOnceDomStacks(true); + } + while ((urlRAMStack.size() <= dist) && (urlFileStack.size() > 0)) { + // flush some entries from disc to ram stack try { - for (int i = 0; i <= (dist - availableInRam); i++) { - if (urlFileStack.size() == 0) break; - urlRAMStack.add(new String(urlFileStack.pop().getColBytes(0))); - } - } catch (IOException e) {} + urlRAMStack.add(new String(urlFileStack.pop().getColBytes(0))); + } catch (IOException e) { + break; + } } - while ((sizeDomainStacks() > 0) && (urlRAMStack.size() <= dist)) flushOnceDomStacks(true); // flush only that much as we need to display if (dist >= urlRAMStack.size()) return null; String urlhash = (String) urlRAMStack.get(dist); kelondroRow.Entry entry = urlFileIndex.get(urlhash.getBytes()); - if (entry == null) return null; + if (entry == null) { + if (kelondroRecords.debugmode) serverLog.logWarning("PLASMA BALANCER", "no entry in index for urlhash " + urlhash); + return null; + } return new plasmaCrawlEntry(entry); } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index b0d2ef52b..4d929ab68 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -61,6 +61,7 @@ public final class plasmaWordIndex implements indexRI { // environment constants public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final int wCacheMaxChunk = 1000; // number of references for each urlhash + public static final int lowcachedivisor = 200; public static final int maxCollectionPartition = 7; // should be 7 private final kelondroOrder indexOrder = kelondroBase64Order.enhancedCoder; @@ -201,8 +202,8 @@ public final class plasmaWordIndex implements indexRI { } public void flushCacheSome() { - flushCache(dhtOutCache, flushsize); - flushCache(dhtInCache, flushsize); + flushCache(dhtOutCache, (dhtOutCache.size() > 3 * flushsize) ? flushsize : Math.min(flushsize, Math.max(1, dhtOutCache.size() / lowcachedivisor))); + flushCache(dhtInCache, (dhtInCache.size() > 3 * flushsize) ? flushsize : Math.min(flushsize, Math.max(1, dhtInCache.size() / lowcachedivisor))); } private void flushCache(indexRAMRI ram, int count) {