added sixcoolers hack with some modifications:

http://forum.yacy-websuche.de/viewtopic.php?p=15004#p15004
old index blobs where deletions have been made because of DHT transmission should be melted down to new blobs. This uses sixcoolers methods from the forum thread but modifies the process in such a way that the blobs are not merged with themselves but simply rewritten to smaller files.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6548 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent fbd24c2d84
commit 83d05e9176

@ -5,8 +5,8 @@
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
@ -270,6 +270,14 @@ public class ArrayStack implements BLOB {
unmountBLOB(bestMatch[0], false);
return bestMatch;
}
public synchronized File unmountOldest() {
if (this.blobs.size() == 0) return null;
if (System.currentTimeMillis() - this.blobs.get(0).creation.getTime() < this.fileAgeLimit) return null;
File f = this.blobs.get(0).location;
unmountBLOB(f, false);
return f;
}
public synchronized File[] unmountSmallest(long maxResultSize) {
if (this.blobs.size() < 2) return null;
@ -731,23 +739,52 @@ public class ArrayStack implements BLOB {
blobs = null;
}
/**
* merge two blob files into one. If the second file is given as null,
* then the first file is only rewritten into a new one.
* @param f1
* @param f2 (may also be null)
* @param factory
* @param payloadrow
* @param newFile
* @param writeBuffer
* @return the target file where the given files are merged in
*/
public File mergeMount(File f1, File f2,
ReferenceFactory<? extends Reference> factory,
Row payloadrow, File newFile, int writeBuffer) {
Log.logInfo("BLOBArray", "merging " + f1.getName() + " with " + f2.getName());
File resultFile = mergeWorker(factory, this.keylength, this.ordering, f1, f2, payloadrow, newFile, writeBuffer);
if (resultFile == null) {
Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " returned null. newFile = " + newFile);
return null;
}
try {
mountBLOB(resultFile, false);
} catch (IOException e) {
Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " successfull, but read failed. resultFile = " + resultFile);
return null;
if (f2 == null) {
// this is a rewrite
Log.logInfo("BLOBArray", "rewrite of " + f1.getName());
File resultFile = rewriteWorker(factory, this.keylength, this.ordering, f1, payloadrow, newFile, writeBuffer);
if (resultFile == null) {
Log.logWarning("BLOBArray", "rewrite of file " + f1 + " returned null. newFile = " + newFile);
return null;
}
try {
mountBLOB(resultFile, false);
} catch (IOException e) {
Log.logWarning("BLOBArray", "rewrite of file " + f1 + " successfull, but read failed. resultFile = " + resultFile);
return null;
}
Log.logInfo("BLOBArray", "rewrite of " + f1.getName() + " into " + resultFile);
return resultFile;
} else {
Log.logInfo("BLOBArray", "merging " + f1.getName() + " with " + f2.getName());
File resultFile = mergeWorker(factory, this.keylength, this.ordering, f1, f2, payloadrow, newFile, writeBuffer);
if (resultFile == null) {
Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " returned null. newFile = " + newFile);
return null;
}
try {
mountBLOB(resultFile, false);
} catch (IOException e) {
Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " successfull, but read failed. resultFile = " + resultFile);
return null;
}
Log.logInfo("BLOBArray", "merged " + f1.getName() + " with " + f2.getName() + " into " + resultFile);
return resultFile;
}
Log.logInfo("BLOBArray", "merged " + f1.getName() + " with " + f2.getName() + " into " + resultFile);
return resultFile;
}
private static <ReferenceType extends Reference> File mergeWorker(
@ -806,6 +843,44 @@ public class ArrayStack implements BLOB {
return newFile;
}
private static <ReferenceType extends Reference> File rewriteWorker(
ReferenceFactory<ReferenceType> factory,
int keylength, ByteOrder order, File f, Row payloadrow, File newFile, int writeBuffer) {
// iterate both files and write a new one
CloneableIterator<ReferenceContainer<ReferenceType>> i = null;
try {
i = new ReferenceIterator<ReferenceType>(f, factory, payloadrow);
} catch (IOException e) {
Log.logSevere("ArrayStack", "cannot rewrite because input file cannot be read, f = " + f.toString() + ": " + e.getMessage(), e);
return null;
}
if (!i.hasNext()) {
FileUtils.deletedelete(f);
return null;
}
assert i.hasNext();
File tmpFile = new File(newFile.getParentFile(), newFile.getName() + ".prt");
try {
HeapWriter writer = new HeapWriter(tmpFile, newFile, keylength, order, writeBuffer);
rewrite(i, order, writer);
writer.close(true);
} catch (IOException e) {
Log.logSevere("ArrayStack", "cannot writing or close writing rewrite, newFile = " + newFile.toString() + ", tmpFile = " + tmpFile.toString() + ": " + e.getMessage(), e);
FileUtils.deletedelete(tmpFile);
FileUtils.deletedelete(newFile);
return null;
} catch (RowSpaceExceededException e) {
Log.logSevere("ArrayStack", "cannot rewrite because of memory failure: " + e.getMessage(), e);
FileUtils.deletedelete(tmpFile);
FileUtils.deletedelete(newFile);
return null;
}
// we don't need the old files any more
FileUtils.deletedelete(f);
return newFile;
}
private static <ReferenceType extends Reference> void merge(
CloneableIterator<ReferenceContainer<ReferenceType>> i1,
CloneableIterator<ReferenceContainer<ReferenceType>> i2,
@ -879,6 +954,26 @@ public class ArrayStack implements BLOB {
}
// finished with writing
}
private static <ReferenceType extends Reference> void rewrite(
CloneableIterator<ReferenceContainer<ReferenceType>> i,
ByteOrder ordering, HeapWriter writer) throws IOException, RowSpaceExceededException {
assert i.hasNext();
ReferenceContainer<ReferenceType> c, co;
c = i.next();
while (true) {
assert c != null;
writer.add(c.getTermHash(), c.exportCollection());
if (i.hasNext()) {
co = c;
c = i.next();
assert ordering.compare(c.getTermHash(), co.getTermHash()) > 0;
continue;
}
break;
}
// finished with writing
}
public static void main(final String[] args) {

@ -109,7 +109,11 @@ public class IODispatcher extends Thread {
public synchronized void merge(File f1, File f2, ReferenceFactory<? extends Reference> factory, ArrayStack array, Row payloadrow, File newFile) {
if (mergeQueue == null || controlQueue == null || !this.isAlive()) {
Log.logWarning("IODispatcher", "emergency merge of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
if (f2 == null) {
Log.logWarning("IODispatcher", "emergency rewrite of file " + f1.getName() + " to " + newFile.getName());
} else {
Log.logWarning("IODispatcher", "emergency merge of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
}
array.mergeMount(f1, f2, factory, payloadrow, newFile, (int) Math.min(MemoryControl.available() / 3, writeBufferSize));
} else {
MergeJob job = new MergeJob(f1, f2, factory, array, payloadrow, newFile);
@ -117,10 +121,18 @@ public class IODispatcher extends Thread {
if (this.isAlive()) {
this.mergeQueue.put(job);
this.controlQueue.release();
Log.logInfo("IODispatcher", "appended merge job of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
if (f2 == null) {
Log.logInfo("IODispatcher", "appended rewrite job of file " + f1.getName() + " to " + newFile.getName());
} else {
Log.logInfo("IODispatcher", "appended merge job of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
}
} else {
job.merge();
Log.logWarning("IODispatcher", "dispatcher not running, merged files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
if (f2 == null) {
Log.logWarning("IODispatcher", "dispatcher not running, merged files " + f1.getName() + " to " + newFile.getName());
} else {
Log.logWarning("IODispatcher", "dispatcher not running, rewrote file " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName());
}
}
} catch (InterruptedException e) {
Log.logWarning("IODispatcher", "interrupted: " + e.getMessage(), e);
@ -166,7 +178,11 @@ public class IODispatcher extends Thread {
Log.logSevere("IODispatcher", "main run job was interrupted (2)", e);
Log.logException(e);
} catch (Exception e) {
Log.logSevere("IODispatcher", "main run job had errors (2), dump to " + f + " failed. Input files are " + f1 + " and " + f2, e);
if (f2 == null) {
Log.logSevere("IODispatcher", "main run job had errors (2), dump to " + f + " failed. Input file is " + f1, e);
} else {
Log.logSevere("IODispatcher", "main run job had errors (2), dump to " + f + " failed. Input files are " + f1 + " and " + f2, e);
}
Log.logException(e);
}
continue loop;
@ -243,7 +259,7 @@ public class IODispatcher extends Thread {
Log.logWarning("IODispatcher", "merge of file (1) " + f1.getName() + " failed: file does not exists");
return null;
}
if (!f2.exists()) {
if (f2 != null && !f2.exists()) {
Log.logWarning("IODispatcher", "merge of file (2) " + f2.getName() + " failed: file does not exists");
return null;
}

@ -310,6 +310,15 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
donesomething = true;
}
// merge very old files with it self (hack from sixcooler, see http://forum.yacy-websuche.de/viewtopic.php?p=15004#p15004)
while (this.merger.queueLength() < 1) {
File ff = this.array.unmountOldest();
if (ff == null) break;
Log.logInfo("RICELL-shrink4/rewrite", "unmountOldest()");
merger.merge(ff, null, this.factory, this.array, this.payloadrow, newContainerBLOBFile());
donesomething = true;
}
return donesomething;
}

Loading…
Cancel
Save