- fixed a flush problem in balancer

- return to idle divisor in RWI RAM cache flush

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3485 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 91c2a042a7
commit 4783a30910

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4 javacTarget=1.4
# Release Configuration # Release Configuration
releaseVersion=0.507 releaseVersion=0.508
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -185,9 +185,14 @@ public class plasmaCrawlBalancer {
public synchronized int size() { public synchronized int size() {
int componentsize = urlFileStack.size() + urlRAMStack.size() + sizeDomainStacks(); int componentsize = urlFileStack.size() + urlRAMStack.size() + sizeDomainStacks();
try { try {
if ((kelondroRecords.debugmode) && (componentsize != urlFileIndex.size())) { if (componentsize != urlFileIndex.size()) {
// hier ist urlIndexFile.size() immer grš§er. warum? // hier ist urlIndexFile.size() immer grš§er. warum?
serverLog.logWarning("PLASMA BALANCER", "size operation wrong - componentsize = " + componentsize + ", ramIndex.size() = " + urlFileIndex.size()); if (kelondroRecords.debugmode) {
serverLog.logWarning("PLASMA BALANCER", "size operation wrong in " + stackname + " - componentsize = " + componentsize + ", urlFileIndex.size() = " + urlFileIndex.size());
}
if (componentsize == 0) {
resetFileIndex();
}
} }
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
@ -238,7 +243,7 @@ public class plasmaCrawlBalancer {
public synchronized void push(plasmaCrawlEntry entry) throws IOException { public synchronized void push(plasmaCrawlEntry entry) throws IOException {
assert entry != null; assert entry != null;
if (urlFileIndex.has(entry.urlhash().getBytes())) { if (urlFileIndex.has(entry.urlhash().getBytes())) {
serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " - fixed"); serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " in " + stackname + " - fixed");
return; return;
} }
@ -410,21 +415,27 @@ public class plasmaCrawlBalancer {
} }
public synchronized plasmaCrawlEntry top(int dist) throws IOException { public synchronized plasmaCrawlEntry top(int dist) throws IOException {
int availableInRam = urlRAMStack.size() + sizeDomainStacks(); // if we need to flush anything, then flush the domain stack first,
if ((availableInRam <= dist) && (urlFileStack.size() > (dist - availableInRam))) { // to avoid that new urls get hidden by old entries from the file stack
// flush some entries from disc to domain stacks while ((sizeDomainStacks() > 0) && (urlRAMStack.size() <= dist)) {
// flush only that much as we need to display
flushOnceDomStacks(true);
}
while ((urlRAMStack.size() <= dist) && (urlFileStack.size() > 0)) {
// flush some entries from disc to ram stack
try { try {
for (int i = 0; i <= (dist - availableInRam); i++) { urlRAMStack.add(new String(urlFileStack.pop().getColBytes(0)));
if (urlFileStack.size() == 0) break; } catch (IOException e) {
urlRAMStack.add(new String(urlFileStack.pop().getColBytes(0))); break;
} }
} catch (IOException e) {}
} }
while ((sizeDomainStacks() > 0) && (urlRAMStack.size() <= dist)) flushOnceDomStacks(true); // flush only that much as we need to display
if (dist >= urlRAMStack.size()) return null; if (dist >= urlRAMStack.size()) return null;
String urlhash = (String) urlRAMStack.get(dist); String urlhash = (String) urlRAMStack.get(dist);
kelondroRow.Entry entry = urlFileIndex.get(urlhash.getBytes()); kelondroRow.Entry entry = urlFileIndex.get(urlhash.getBytes());
if (entry == null) return null; if (entry == null) {
if (kelondroRecords.debugmode) serverLog.logWarning("PLASMA BALANCER", "no entry in index for urlhash " + urlhash);
return null;
}
return new plasmaCrawlEntry(entry); return new plasmaCrawlEntry(entry);
} }

@ -61,6 +61,7 @@ public final class plasmaWordIndex implements indexRI {
// environment constants // environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final int wCacheMaxChunk = 1000; // number of references for each urlhash public static final int wCacheMaxChunk = 1000; // number of references for each urlhash
public static final int lowcachedivisor = 200;
public static final int maxCollectionPartition = 7; // should be 7 public static final int maxCollectionPartition = 7; // should be 7
private final kelondroOrder indexOrder = kelondroBase64Order.enhancedCoder; private final kelondroOrder indexOrder = kelondroBase64Order.enhancedCoder;
@ -201,8 +202,8 @@ public final class plasmaWordIndex implements indexRI {
} }
public void flushCacheSome() { public void flushCacheSome() {
flushCache(dhtOutCache, flushsize); flushCache(dhtOutCache, (dhtOutCache.size() > 3 * flushsize) ? flushsize : Math.min(flushsize, Math.max(1, dhtOutCache.size() / lowcachedivisor)));
flushCache(dhtInCache, flushsize); flushCache(dhtInCache, (dhtInCache.size() > 3 * flushsize) ? flushsize : Math.min(flushsize, Math.max(1, dhtInCache.size() / lowcachedivisor)));
} }
private void flushCache(indexRAMRI ram, int count) { private void flushCache(indexRAMRI ram, int count) {

Loading…
Cancel
Save