better index cell merge logic

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5754 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 832fef670f
commit 0f0b4aec75

@ -48,44 +48,78 @@ import de.anomic.kelondro.text.ReferenceContainerCache.blobFileEntries;
* of merging with a call to the start() - method. To shut down all mergings, call terminate() * of merging with a call to the start() - method. To shut down all mergings, call terminate()
* only once. * only once.
*/ */
public class ReferenceContainerMerger extends Thread { public class IODispatcher extends Thread {
private Job poison; private final Boolean poison, vita;
private ArrayBlockingQueue<Job> queue; private ArrayBlockingQueue<Boolean> controlQueue;
private ArrayBlockingQueue<Job> termi; private ArrayBlockingQueue<MergeJob> mergeQueue;
private ArrayBlockingQueue<DumpJob> dumpQueue;
private ArrayBlockingQueue<Boolean> termQueue;
public ReferenceContainerMerger(int queueLength) { public IODispatcher(int dumpQueueLength, int mergeQueueLength) {
this.poison = new Job(); this.poison = new Boolean(false);
this.queue = new ArrayBlockingQueue<Job>(queueLength); this.vita = new Boolean(true);
this.termi = new ArrayBlockingQueue<Job>(1); this.controlQueue = new ArrayBlockingQueue<Boolean>(dumpQueueLength + mergeQueueLength + 1);
this.dumpQueue = new ArrayBlockingQueue<DumpJob>(dumpQueueLength);
this.mergeQueue = new ArrayBlockingQueue<MergeJob>(mergeQueueLength);
this.termQueue = new ArrayBlockingQueue<Boolean>(1);
} }
public synchronized void terminate() { public synchronized void terminate() {
if (queue == null || !this.isAlive()) return; if (termQueue != null && this.isAlive()) {
try { try {
queue.put(poison); controlQueue.put(poison);
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
}
// await termination
try {
termQueue.take();
} catch (InterruptedException e) {
e.printStackTrace();
}
} }
// await termination }
try {
termi.take(); public synchronized void dump(ReferenceContainerCache cache, File file, ReferenceContainerArray array) {
} catch (InterruptedException e) { if (dumpQueue == null || !this.isAlive()) {
e.printStackTrace(); try {
cache.dump(file, true);
} catch (IOException e) {
e.printStackTrace();
}
} else {
DumpJob job = new DumpJob(cache, file, array);
try {
dumpQueue.put(job);
controlQueue.put(vita);
} catch (InterruptedException e) {
e.printStackTrace();
try {
cache.dump(file, true);
} catch (IOException ee) {
e.printStackTrace();
}
}
} }
} }
public synchronized int queueLength() {
return controlQueue.size();
}
public synchronized void merge(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) { public synchronized void merge(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) {
if (queue == null || !this.isAlive()) { if (mergeQueue == null || !this.isAlive()) {
try { try {
mergeMount(f1, f2, array, payloadrow, newFile); mergeMount(f1, f2, array, payloadrow, newFile);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} else { } else {
Job job = new Job(f1, f2, array, payloadrow, newFile); MergeJob job = new MergeJob(f1, f2, array, payloadrow, newFile);
try { try {
queue.put(job); mergeQueue.put(job);
controlQueue.put(vita);
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
try { try {
@ -98,37 +132,61 @@ public class ReferenceContainerMerger extends Thread {
} }
public void run() { public void run() {
Job job; MergeJob mergeJob;
DumpJob dumpJob;
try { try {
while ((job = queue.take()) != poison) { loop: while (controlQueue.take() != poison) {
job.merge(); // prefer dump actions to flush memory to disc
if (dumpQueue.size() > 0) {
dumpJob = dumpQueue.take();
dumpJob.dump();
continue loop;
}
// otherwise do a merge operation
if (mergeQueue.size() > 0) {
mergeJob = mergeQueue.take();
mergeJob.merge();
continue loop;
}
assert false; // this should never happen
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
} finally { } finally {
try { try {
termi.put(poison); termQueue.put(poison);
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
} }
public class Job { public class DumpJob {
ReferenceContainerCache cache;
File file;
ReferenceContainerArray array;
public DumpJob(ReferenceContainerCache cache, File file, ReferenceContainerArray array) {
this.cache = cache;
this.file = file;
this.array = array;
}
public void dump() {
try {
cache.dump(file, true);
array.mountBLOBContainer(file);
} catch (IOException e) {
e.printStackTrace();
}
}
}
public class MergeJob {
File f1, f2, newFile; File f1, f2, newFile;
BLOBArray array; BLOBArray array;
Row payloadrow; Row payloadrow;
public Job() { public MergeJob(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) {
this.f1 = null;
this.f2 = null;
this.newFile = null;
this.array = null;
this.payloadrow = null;
}
public Job(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) {
this.f1 = f1; this.f1 = f1;
this.f2 = f2; this.f2 = f2;
this.newFile = newFile; this.newFile = newFile;

@ -51,10 +51,15 @@ import de.anomic.server.serverProfiling;
public final class IndexCell extends AbstractBufferedIndex implements BufferedIndex { public final class IndexCell extends AbstractBufferedIndex implements BufferedIndex {
private static final long cleanupCycle = 10000;
// class variables // class variables
private ReferenceContainerArray array; private final ReferenceContainerArray array;
private ReferenceContainerCache ram; private ReferenceContainerCache ram;
private int maxRamEntries, maxArrayFiles; private int maxRamEntries, maxArrayFiles;
private final IODispatcher merger;
private final long lastCleanup;
public IndexCell( public IndexCell(
final File cellPath, final File cellPath,
@ -62,13 +67,15 @@ public final class IndexCell extends AbstractBufferedIndex implements BufferedIn
final Row payloadrow, final Row payloadrow,
final int maxRamEntries, final int maxRamEntries,
final int maxArrayFiles, final int maxArrayFiles,
ReferenceContainerMerger merger IODispatcher merger
) throws IOException { ) throws IOException {
this.array = new ReferenceContainerArray(cellPath, wordOrder, payloadrow, merger); this.array = new ReferenceContainerArray(cellPath, wordOrder, payloadrow, merger);
this.ram = new ReferenceContainerCache(payloadrow, wordOrder); this.ram = new ReferenceContainerCache(payloadrow, wordOrder);
this.ram.initWriteMode(); this.ram.initWriteMode();
this.maxRamEntries = maxRamEntries; this.maxRamEntries = maxRamEntries;
this.maxArrayFiles = maxArrayFiles; this.maxArrayFiles = maxArrayFiles;
this.merger = merger;
this.lastCleanup = System.currentTimeMillis();
} }
@ -85,12 +92,14 @@ public final class IndexCell extends AbstractBufferedIndex implements BufferedIn
this.ram.add(newEntries); this.ram.add(newEntries);
serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true); serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true);
if (this.ram.size() > this.maxRamEntries) cacheDump(); if (this.ram.size() > this.maxRamEntries) cacheDump();
cacheCleanup();
} }
public synchronized void add(String hash, ReferenceRow entry) throws IOException { public synchronized void add(String hash, ReferenceRow entry) throws IOException {
this.ram.add(hash, entry); this.ram.add(hash, entry);
serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true); serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true);
if (this.ram.size() > this.maxRamEntries) cacheDump(); if (this.ram.size() > this.maxRamEntries) cacheDump();
cacheCleanup();
} }
/** /**
@ -262,15 +271,19 @@ public final class IndexCell extends AbstractBufferedIndex implements BufferedIn
private synchronized void cacheDump() throws IOException { private synchronized void cacheDump() throws IOException {
// dump the ram // dump the ram
File dumpFile = this.array.newContainerBLOBFile(); File dumpFile = this.array.newContainerBLOBFile();
this.ram.dump(dumpFile, true); //this.ram.dump(dumpFile, true);
//this.array.mountBLOBContainer(dumpFile);
merger.dump(this.ram, dumpFile, array);
// get a fresh ram cache // get a fresh ram cache
this.ram = new ReferenceContainerCache(this.array.rowdef(), this.array.ordering()); this.ram = new ReferenceContainerCache(this.array.rowdef(), this.array.ordering());
this.ram.initWriteMode(); this.ram.initWriteMode();
// add the dumped indexContainerBLOB to the array }
this.array.mountBLOBContainer(dumpFile);
private synchronized void cacheCleanup() throws IOException {
if (this.lastCleanup + cleanupCycle > System.currentTimeMillis()) return;
int c = 0; int c = 0;
while (this.array.entries() > this.maxArrayFiles && c++ < 3) { if (this.array.entries() > this.maxArrayFiles && c++ < 3) {
if (!this.array.merge(true)) break; this.array.shrink(true);
} }
} }

@ -41,7 +41,7 @@ public final class ReferenceContainerArray {
private final Row payloadrow; private final Row payloadrow;
private final BLOBArray array; private final BLOBArray array;
private final ReferenceContainerMerger merger; private final IODispatcher merger;
/** /**
* open a index container based on a BLOB dump. The content of the BLOB will not be read * open a index container based on a BLOB dump. The content of the BLOB will not be read
@ -57,7 +57,7 @@ public final class ReferenceContainerArray {
final File heapLocation, final File heapLocation,
final ByteOrder wordOrder, final ByteOrder wordOrder,
final Row payloadrow, final Row payloadrow,
ReferenceContainerMerger merger) throws IOException { IODispatcher merger) throws IOException {
this.payloadrow = payloadrow; this.payloadrow = payloadrow;
this.array = new BLOBArray( this.array = new BLOBArray(
heapLocation, heapLocation,
@ -243,8 +243,9 @@ public final class ReferenceContainerArray {
return this.array.entries(); return this.array.entries();
} }
public synchronized boolean merge(boolean similar) throws IOException { public synchronized boolean shrink(boolean similar) throws IOException {
if (this.array.entries() < 2) return false; if (this.array.entries() < 2) return false;
if (this.merger.queueLength() > 0) return false;
File f1 = this.array.unmountOldestBLOB(similar); File f1 = this.array.unmountOldestBLOB(similar);
File f2 = (similar) ? this.array.unmountSimilarSizeBLOB(f1.length()) : this.array.unmountOldestBLOB(false); File f2 = (similar) ? this.array.unmountSimilarSizeBLOB(f1.length()) : this.array.unmountOldestBLOB(false);
merger.merge(f1, f2, this.array, this.payloadrow, newContainerBLOBFile()); merger.merge(f1, f2, this.array, this.payloadrow, newContainerBLOBFile());

@ -47,7 +47,7 @@ import de.anomic.kelondro.text.BufferedIndexCollection;
import de.anomic.kelondro.text.IndexCell; import de.anomic.kelondro.text.IndexCell;
import de.anomic.kelondro.text.MetadataRowContainer; import de.anomic.kelondro.text.MetadataRowContainer;
import de.anomic.kelondro.text.ReferenceContainer; import de.anomic.kelondro.text.ReferenceContainer;
import de.anomic.kelondro.text.ReferenceContainerMerger; import de.anomic.kelondro.text.IODispatcher;
import de.anomic.kelondro.text.ReferenceRow; import de.anomic.kelondro.text.ReferenceRow;
import de.anomic.kelondro.text.MetadataRepository; import de.anomic.kelondro.text.MetadataRepository;
import de.anomic.kelondro.text.Word; import de.anomic.kelondro.text.Word;
@ -97,7 +97,7 @@ public final class plasmaWordIndex {
public CrawlProfile.entry defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile; public CrawlProfile.entry defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
public CrawlProfile.entry defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile; public CrawlProfile.entry defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
private final File queuesRoot; private final File queuesRoot;
private ReferenceContainerMerger merger; private IODispatcher merger;
public plasmaWordIndex( public plasmaWordIndex(
final String networkName, final String networkName,
@ -132,7 +132,7 @@ public final class plasmaWordIndex {
} }
} }
} }
this.merger = (useCell) ? new ReferenceContainerMerger(1) : null; this.merger = (useCell) ? new IODispatcher(1, 1) : null;
if (this.merger != null) this.merger.start(); if (this.merger != null) this.merger.start();
this.index = (useCell) ? this.index = (useCell) ?
new IndexCell( new IndexCell(

Loading…
Cancel
Save