- fixed a flush problem in balancer

- return to idle divisor in RWI RAM cache flush

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3485 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 91c2a042a7
commit 4783a30910

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.507
releaseVersion=0.508
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -185,9 +185,14 @@ public class plasmaCrawlBalancer {
public synchronized int size() {
int componentsize = urlFileStack.size() + urlRAMStack.size() + sizeDomainStacks();
try {
if ((kelondroRecords.debugmode) && (componentsize != urlFileIndex.size())) {
if (componentsize != urlFileIndex.size()) {
// hier ist urlIndexFile.size() immer grš§er. warum?
serverLog.logWarning("PLASMA BALANCER", "size operation wrong - componentsize = " + componentsize + ", ramIndex.size() = " + urlFileIndex.size());
if (kelondroRecords.debugmode) {
serverLog.logWarning("PLASMA BALANCER", "size operation wrong in " + stackname + " - componentsize = " + componentsize + ", urlFileIndex.size() = " + urlFileIndex.size());
}
if (componentsize == 0) {
resetFileIndex();
}
}
} catch (IOException e) {
e.printStackTrace();
@ -238,7 +243,7 @@ public class plasmaCrawlBalancer {
public synchronized void push(plasmaCrawlEntry entry) throws IOException {
assert entry != null;
if (urlFileIndex.has(entry.urlhash().getBytes())) {
serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " - fixed");
serverLog.logWarning("PLASMA BALANCER", "double-check has failed for urlhash " + entry.urlhash() + " in " + stackname + " - fixed");
return;
}
@ -410,21 +415,27 @@ public class plasmaCrawlBalancer {
}
public synchronized plasmaCrawlEntry top(int dist) throws IOException {
int availableInRam = urlRAMStack.size() + sizeDomainStacks();
if ((availableInRam <= dist) && (urlFileStack.size() > (dist - availableInRam))) {
// flush some entries from disc to domain stacks
// if we need to flush anything, then flush the domain stack first,
// to avoid that new urls get hidden by old entries from the file stack
while ((sizeDomainStacks() > 0) && (urlRAMStack.size() <= dist)) {
// flush only that much as we need to display
flushOnceDomStacks(true);
}
while ((urlRAMStack.size() <= dist) && (urlFileStack.size() > 0)) {
// flush some entries from disc to ram stack
try {
for (int i = 0; i <= (dist - availableInRam); i++) {
if (urlFileStack.size() == 0) break;
urlRAMStack.add(new String(urlFileStack.pop().getColBytes(0)));
}
} catch (IOException e) {}
urlRAMStack.add(new String(urlFileStack.pop().getColBytes(0)));
} catch (IOException e) {
break;
}
}
while ((sizeDomainStacks() > 0) && (urlRAMStack.size() <= dist)) flushOnceDomStacks(true); // flush only that much as we need to display
if (dist >= urlRAMStack.size()) return null;
String urlhash = (String) urlRAMStack.get(dist);
kelondroRow.Entry entry = urlFileIndex.get(urlhash.getBytes());
if (entry == null) return null;
if (entry == null) {
if (kelondroRecords.debugmode) serverLog.logWarning("PLASMA BALANCER", "no entry in index for urlhash " + urlhash);
return null;
}
return new plasmaCrawlEntry(entry);
}

@ -61,6 +61,7 @@ public final class plasmaWordIndex implements indexRI {
// environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final int wCacheMaxChunk = 1000; // number of references for each urlhash
public static final int lowcachedivisor = 200;
public static final int maxCollectionPartition = 7; // should be 7
private final kelondroOrder indexOrder = kelondroBase64Order.enhancedCoder;
@ -201,8 +202,8 @@ public final class plasmaWordIndex implements indexRI {
}
public void flushCacheSome() {
flushCache(dhtOutCache, flushsize);
flushCache(dhtInCache, flushsize);
flushCache(dhtOutCache, (dhtOutCache.size() > 3 * flushsize) ? flushsize : Math.min(flushsize, Math.max(1, dhtOutCache.size() / lowcachedivisor)));
flushCache(dhtInCache, (dhtInCache.size() > 3 * flushsize) ? flushsize : Math.min(flushsize, Math.max(1, dhtInCache.size() / lowcachedivisor)));
}
private void flushCache(indexRAMRI ram, int count) {

Loading…
Cancel
Save