diff --git a/source/net/yacy/kelondro/blob/ArrayStack.java b/source/net/yacy/kelondro/blob/ArrayStack.java index 3563acb57..8b0d30e19 100755 --- a/source/net/yacy/kelondro/blob/ArrayStack.java +++ b/source/net/yacy/kelondro/blob/ArrayStack.java @@ -5,8 +5,8 @@ // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ +// $LastChangedRevision$ +// $LastChangedBy$ // // LICENSE // @@ -270,6 +270,14 @@ public class ArrayStack implements BLOB { unmountBLOB(bestMatch[0], false); return bestMatch; } + + public synchronized File unmountOldest() { + if (this.blobs.size() == 0) return null; + if (System.currentTimeMillis() - this.blobs.get(0).creation.getTime() < this.fileAgeLimit) return null; + File f = this.blobs.get(0).location; + unmountBLOB(f, false); + return f; + } public synchronized File[] unmountSmallest(long maxResultSize) { if (this.blobs.size() < 2) return null; @@ -731,23 +739,52 @@ public class ArrayStack implements BLOB { blobs = null; } + /** + * merge two blob files into one. If the second file is given as null, + * then the first file is only rewritten into a new one. + * @param f1 + * @param f2 (may also be null) + * @param factory + * @param payloadrow + * @param newFile + * @param writeBuffer + * @return the target file where the given files are merged in + */ public File mergeMount(File f1, File f2, ReferenceFactory factory, Row payloadrow, File newFile, int writeBuffer) { - Log.logInfo("BLOBArray", "merging " + f1.getName() + " with " + f2.getName()); - File resultFile = mergeWorker(factory, this.keylength, this.ordering, f1, f2, payloadrow, newFile, writeBuffer); - if (resultFile == null) { - Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " returned null. newFile = " + newFile); - return null; - } - try { - mountBLOB(resultFile, false); - } catch (IOException e) { - Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " successfull, but read failed. resultFile = " + resultFile); - return null; + if (f2 == null) { + // this is a rewrite + Log.logInfo("BLOBArray", "rewrite of " + f1.getName()); + File resultFile = rewriteWorker(factory, this.keylength, this.ordering, f1, payloadrow, newFile, writeBuffer); + if (resultFile == null) { + Log.logWarning("BLOBArray", "rewrite of file " + f1 + " returned null. newFile = " + newFile); + return null; + } + try { + mountBLOB(resultFile, false); + } catch (IOException e) { + Log.logWarning("BLOBArray", "rewrite of file " + f1 + " successfull, but read failed. resultFile = " + resultFile); + return null; + } + Log.logInfo("BLOBArray", "rewrite of " + f1.getName() + " into " + resultFile); + return resultFile; + } else { + Log.logInfo("BLOBArray", "merging " + f1.getName() + " with " + f2.getName()); + File resultFile = mergeWorker(factory, this.keylength, this.ordering, f1, f2, payloadrow, newFile, writeBuffer); + if (resultFile == null) { + Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " returned null. newFile = " + newFile); + return null; + } + try { + mountBLOB(resultFile, false); + } catch (IOException e) { + Log.logWarning("BLOBArray", "merge of files " + f1 + ", " + f2 + " successfull, but read failed. resultFile = " + resultFile); + return null; + } + Log.logInfo("BLOBArray", "merged " + f1.getName() + " with " + f2.getName() + " into " + resultFile); + return resultFile; } - Log.logInfo("BLOBArray", "merged " + f1.getName() + " with " + f2.getName() + " into " + resultFile); - return resultFile; } private static File mergeWorker( @@ -806,6 +843,44 @@ public class ArrayStack implements BLOB { return newFile; } + private static File rewriteWorker( + ReferenceFactory factory, + int keylength, ByteOrder order, File f, Row payloadrow, File newFile, int writeBuffer) { + // iterate both files and write a new one + + CloneableIterator> i = null; + try { + i = new ReferenceIterator(f, factory, payloadrow); + } catch (IOException e) { + Log.logSevere("ArrayStack", "cannot rewrite because input file cannot be read, f = " + f.toString() + ": " + e.getMessage(), e); + return null; + } + if (!i.hasNext()) { + FileUtils.deletedelete(f); + return null; + } + assert i.hasNext(); + File tmpFile = new File(newFile.getParentFile(), newFile.getName() + ".prt"); + try { + HeapWriter writer = new HeapWriter(tmpFile, newFile, keylength, order, writeBuffer); + rewrite(i, order, writer); + writer.close(true); + } catch (IOException e) { + Log.logSevere("ArrayStack", "cannot writing or close writing rewrite, newFile = " + newFile.toString() + ", tmpFile = " + tmpFile.toString() + ": " + e.getMessage(), e); + FileUtils.deletedelete(tmpFile); + FileUtils.deletedelete(newFile); + return null; + } catch (RowSpaceExceededException e) { + Log.logSevere("ArrayStack", "cannot rewrite because of memory failure: " + e.getMessage(), e); + FileUtils.deletedelete(tmpFile); + FileUtils.deletedelete(newFile); + return null; + } + // we don't need the old files any more + FileUtils.deletedelete(f); + return newFile; + } + private static void merge( CloneableIterator> i1, CloneableIterator> i2, @@ -879,6 +954,26 @@ public class ArrayStack implements BLOB { } // finished with writing } + + private static void rewrite( + CloneableIterator> i, + ByteOrder ordering, HeapWriter writer) throws IOException, RowSpaceExceededException { + assert i.hasNext(); + ReferenceContainer c, co; + c = i.next(); + while (true) { + assert c != null; + writer.add(c.getTermHash(), c.exportCollection()); + if (i.hasNext()) { + co = c; + c = i.next(); + assert ordering.compare(c.getTermHash(), co.getTermHash()) > 0; + continue; + } + break; + } + // finished with writing + } public static void main(final String[] args) { diff --git a/source/net/yacy/kelondro/rwi/IODispatcher.java b/source/net/yacy/kelondro/rwi/IODispatcher.java index 891968698..7054725c5 100644 --- a/source/net/yacy/kelondro/rwi/IODispatcher.java +++ b/source/net/yacy/kelondro/rwi/IODispatcher.java @@ -109,7 +109,11 @@ public class IODispatcher extends Thread { public synchronized void merge(File f1, File f2, ReferenceFactory factory, ArrayStack array, Row payloadrow, File newFile) { if (mergeQueue == null || controlQueue == null || !this.isAlive()) { - Log.logWarning("IODispatcher", "emergency merge of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName()); + if (f2 == null) { + Log.logWarning("IODispatcher", "emergency rewrite of file " + f1.getName() + " to " + newFile.getName()); + } else { + Log.logWarning("IODispatcher", "emergency merge of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName()); + } array.mergeMount(f1, f2, factory, payloadrow, newFile, (int) Math.min(MemoryControl.available() / 3, writeBufferSize)); } else { MergeJob job = new MergeJob(f1, f2, factory, array, payloadrow, newFile); @@ -117,10 +121,18 @@ public class IODispatcher extends Thread { if (this.isAlive()) { this.mergeQueue.put(job); this.controlQueue.release(); - Log.logInfo("IODispatcher", "appended merge job of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName()); + if (f2 == null) { + Log.logInfo("IODispatcher", "appended rewrite job of file " + f1.getName() + " to " + newFile.getName()); + } else { + Log.logInfo("IODispatcher", "appended merge job of files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName()); + } } else { job.merge(); - Log.logWarning("IODispatcher", "dispatcher not running, merged files " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName()); + if (f2 == null) { + Log.logWarning("IODispatcher", "dispatcher not running, merged files " + f1.getName() + " to " + newFile.getName()); + } else { + Log.logWarning("IODispatcher", "dispatcher not running, rewrote file " + f1.getName() + ", " + f2.getName() + " to " + newFile.getName()); + } } } catch (InterruptedException e) { Log.logWarning("IODispatcher", "interrupted: " + e.getMessage(), e); @@ -166,7 +178,11 @@ public class IODispatcher extends Thread { Log.logSevere("IODispatcher", "main run job was interrupted (2)", e); Log.logException(e); } catch (Exception e) { - Log.logSevere("IODispatcher", "main run job had errors (2), dump to " + f + " failed. Input files are " + f1 + " and " + f2, e); + if (f2 == null) { + Log.logSevere("IODispatcher", "main run job had errors (2), dump to " + f + " failed. Input file is " + f1, e); + } else { + Log.logSevere("IODispatcher", "main run job had errors (2), dump to " + f + " failed. Input files are " + f1 + " and " + f2, e); + } Log.logException(e); } continue loop; @@ -243,7 +259,7 @@ public class IODispatcher extends Thread { Log.logWarning("IODispatcher", "merge of file (1) " + f1.getName() + " failed: file does not exists"); return null; } - if (!f2.exists()) { + if (f2 != null && !f2.exists()) { Log.logWarning("IODispatcher", "merge of file (2) " + f2.getName() + " failed: file does not exists"); return null; } diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java b/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java index 503766696..45b3a0292 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerArray.java @@ -310,6 +310,15 @@ public final class ReferenceContainerArray { donesomething = true; } + // merge very old files with it self (hack from sixcooler, see http://forum.yacy-websuche.de/viewtopic.php?p=15004#p15004) + while (this.merger.queueLength() < 1) { + File ff = this.array.unmountOldest(); + if (ff == null) break; + Log.logInfo("RICELL-shrink4/rewrite", "unmountOldest()"); + merger.merge(ff, null, this.factory, this.array, this.payloadrow, newContainerBLOBFile()); + donesomething = true; + } + return donesomething; }