diff --git a/source/net/yacy/kelondro/blob/Heap.java b/source/net/yacy/kelondro/blob/Heap.java index c8c878db0..47e73b5eb 100755 --- a/source/net/yacy/kelondro/blob/Heap.java +++ b/source/net/yacy/kelondro/blob/Heap.java @@ -166,17 +166,19 @@ public final class Heap extends HeapModifier implements BLOB { public void flushBuffer() throws IOException { assert buffer != null; if (buffer == null) return; + // check size of buffer Iterator> i = this.buffer.entrySet().iterator(); int l = 0; while (i.hasNext()) l += i.next().getValue().length; assert l == this.buffersize; - // simulate write: this whole code block is only here to test the assert at the end of the block; remove after testing - i = this.buffer.entrySet().iterator(); int posBuffer = 0; Map.Entry entry; byte[] key, blob; + // simulate write: this whole code block is only here to test the assert at the end of the block; remove after testing + /* + i = this.buffer.entrySet().iterator(); while (i.hasNext()) { entry = i.next(); key = normalizeKey(entry.getKey()); @@ -184,6 +186,11 @@ public final class Heap extends HeapModifier implements BLOB { posBuffer += 4 + this.keylength + blob.length; } assert l + (4 + this.keylength) * this.buffer.size() == posBuffer : "l = " + l + ", this.keylength = " + this.keylength + ", this.buffer.size() = " + this.buffer.size() + ", posBuffer = " + posBuffer; + */ + + synchronized (this) { + super.deleteFingerprint(); + } // append all contents of the buffer into one byte[] i = this.buffer.entrySet().iterator(); @@ -461,6 +468,8 @@ public final class Heap extends HeapModifier implements BLOB { key = normalizeKey(key); synchronized (this) { + super.deleteFingerprint(); + // check the buffer assert buffer != null; if (buffer != null) { diff --git a/source/net/yacy/kelondro/blob/HeapModifier.java b/source/net/yacy/kelondro/blob/HeapModifier.java index 5090aca47..64f4cce0d 100644 --- a/source/net/yacy/kelondro/blob/HeapModifier.java +++ b/source/net/yacy/kelondro/blob/HeapModifier.java @@ -67,6 +67,7 @@ public class HeapModifier extends HeapReader implements BLOB { this.file.close(); this.file = null; FileUtils.deletedelete(this.heapFile); + super.deleteFingerprint(); this.file = new CachedFileWriter(heapFile); } @@ -118,6 +119,7 @@ public class HeapModifier extends HeapReader implements BLOB { Log.logSevere("BLOBHeap", heapFile.getName() + ": too long size " + size + " in record at " + seek); throw new IOException(heapFile.getName() + ": too long size " + size + " in record at " + seek); } + super.deleteFingerprint(); // add entry to free array this.free.put(seek, size); @@ -267,6 +269,7 @@ public class HeapModifier extends HeapReader implements BLOB { if (MemoryControl.available() < len) { if (!MemoryControl.request(len, true)) return 0; // not enough memory available for this blob } + super.deleteFingerprint(); // read the key final byte[] keyf = new byte[this.keylength]; diff --git a/source/net/yacy/kelondro/blob/HeapReader.java b/source/net/yacy/kelondro/blob/HeapReader.java index eda2e659e..3bd744a3c 100644 --- a/source/net/yacy/kelondro/blob/HeapReader.java +++ b/source/net/yacy/kelondro/blob/HeapReader.java @@ -1,4 +1,4 @@ -// kelondroBLOBHeapReader.java +// HeapReader.java // (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 30.12.2008 on http://yacy.net // @@ -63,6 +63,7 @@ public class HeapReader { protected Writer file; // a random access to the file protected HandleMap index; // key/seek relation for used records protected Gap free; // set of {seek, size} pairs denoting space and position of free records + private File fingerprintFileIdx, fingerprintFileGap; // files with dumped indexes. Will be deleted if file is written public HeapReader( final File heapFile, @@ -77,6 +78,8 @@ public class HeapReader { this.file = new CachedFileWriter(this.heapFile); // read or initialize the index + fingerprintFileIdx = null; + fingerprintFileGap = null; if (initIndexReadDump()) { // verify that everything worked just fine // pick some elements of the index @@ -153,19 +156,19 @@ public class HeapReader { Log.logSevere("HeapReader", "cannot generate a fingerprint for " + this.heapFile + ": null"); return false; } - File fif = fingerprintIndexFile(this.heapFile, fingerprint); - if (!fif.exists()) fif = new File(fif.getAbsolutePath() + ".gz"); - File fgf = fingerprintGapFile(this.heapFile, fingerprint); - if (!fgf.exists()) fgf = new File(fgf.getAbsolutePath() + ".gz"); - if (!fif.exists() || !fgf.exists()) { - deleteAllFingerprints(this.heapFile, fif.getName(), fgf.getName()); + this.fingerprintFileIdx = fingerprintIndexFile(this.heapFile, fingerprint); + if (!this.fingerprintFileIdx.exists()) this.fingerprintFileIdx = new File(this.fingerprintFileIdx.getAbsolutePath() + ".gz"); + this.fingerprintFileGap = fingerprintGapFile(this.heapFile, fingerprint); + if (!this.fingerprintFileGap.exists()) this.fingerprintFileGap = new File(this.fingerprintFileGap.getAbsolutePath() + ".gz"); + if (!this.fingerprintFileIdx.exists() || !this.fingerprintFileGap.exists()) { + deleteAllFingerprints(this.heapFile, this.fingerprintFileIdx.getName(), this.fingerprintFileGap.getName()); return false; } // there is an index and a gap file: // read the index file: try { - this.index = new HandleMap(this.keylength, this.ordering, 8, fif); + this.index = new HandleMap(this.keylength, this.ordering, 8, this.fingerprintFileIdx); } catch (IOException e) { Log.logException(e); return false; @@ -176,20 +179,15 @@ public class HeapReader { // check saturation int[] saturation = this.index.saturation(); - Log.logInfo("HeapReader", "saturation of " + fif.getName() + ": keylength = " + saturation[0] + ", vallength = " + saturation[1] + ", possible saving: " + ((this.keylength - saturation[0] + 8 - saturation[1]) * index.size() / 1024 / 1024) + " MB"); - - // an index file is a one-time throw-away object, so just delete it now - FileUtils.deletedelete(fif); + Log.logInfo("HeapReader", "saturation of " + this.fingerprintFileIdx.getName() + ": keylength = " + saturation[0] + ", vallength = " + saturation[1] + ", possible saving: " + ((this.keylength - saturation[0] + 8 - saturation[1]) * index.size() / 1024 / 1024) + " MB"); // read the gap file: try { - this.free = new Gap(fgf); + this.free = new Gap(this.fingerprintFileGap); } catch (IOException e) { Log.logException(e); return false; } - // same with gap file - FileUtils.deletedelete(fgf); // everything is fine now return !this.index.isEmpty(); @@ -205,6 +203,23 @@ public class HeapReader { return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap"); } + /** + * deletion of the fingerprint: this should happen if the heap is written or entries are deleted + * if the files are not deleted then it may be possible that they are not used anyway because the + * fingerprint hash does not fit with the heap dump file hash. But since the hash is not computed + * from all the data and just some key bytes it may be possible that the hash did not change. + */ + public void deleteFingerprint() { + if (this.fingerprintFileIdx != null) { + FileUtils.deletedelete(this.fingerprintFileIdx); + this.fingerprintFileIdx = null; + } + if (this.fingerprintFileGap != null) { + FileUtils.deletedelete(this.fingerprintFileGap); + this.fingerprintFileGap = null; + } + } + protected static String fingerprintFileHash(File f) { assert f != null; assert f.exists() : "file = " + f.toString(); @@ -214,7 +229,7 @@ public class HeapReader { return fp.substring(0, 12); } - public static void deleteAllFingerprints(File f, String exception1, String exception2) { + private static void deleteAllFingerprints(File f, String exception1, String exception2) { File d = f.getParentFile(); String n = f.getName(); String[] l = d.list(); @@ -250,7 +265,7 @@ public class HeapReader { //assert reclen > 0 : " reclen == 0 at seek pos " + seek; if (reclen == 0) { // very bad file inconsistency - Log.logSevere("kelondroBLOBHeap", "reclen == 0 at seek pos " + seek + " in file " + heapFile); + Log.logSevere("HeapReader", "reclen == 0 at seek pos " + seek + " in file " + heapFile); this.file.setLength(seek); // delete everything else at the remaining of the file :-( break loop; } @@ -277,7 +292,7 @@ public class HeapReader { file.seek(seek + 4); Arrays.fill(key, (byte) 0); file.write(key); // mark the place as empty record - Log.logWarning("kelondroBLOBHeap", "BLOB " + heapFile.getName() + ": skiped not wellformed key " + new String(key) + " at seek pos " + seek); + Log.logWarning("HeapReader", "BLOB " + heapFile.getName() + ": skiped not wellformed key " + new String(key) + " at seek pos " + seek); } } // new seek position @@ -321,7 +336,8 @@ public class HeapReader { lastFree = nextFree; } } - Log.logInfo("kelondroBLOBHeap", "BLOB " + heapFile.toString() + ": merged " + merged + " free records"); + Log.logInfo("HeapReader", "BLOB " + heapFile.toString() + ": merged " + merged + " free records"); + if (merged > 0) deleteFingerprint(); } } @@ -465,7 +481,7 @@ public class HeapReader { file.readFully(keyf, 0, keyf.length); if (!this.ordering.equal(key, keyf)) { // verification of the indexed access failed. we must re-read the index - Log.logSevere("kelondroBLOBHeap", "indexed verification access failed for " + heapFile.toString()); + Log.logSevere("HeapReader", "indexed verification access failed for " + heapFile.toString()); // this is a severe operation, it should never happen. // remove entry from index because keeping that element in the index would not make sense index.remove(key); @@ -548,18 +564,34 @@ public class HeapReader { // now we can create a dump of the index and the gap information // to speed up the next start try { - long start = System.currentTimeMillis(); String fingerprint = fingerprintFileHash(this.heapFile); if (fingerprint == null) { - Log.logSevere("kelondroBLOBHeap", "cannot write a dump for " + heapFile.getName()+ ": fingerprint is null"); + Log.logSevere("HeapReader", "cannot write a dump for " + heapFile.getName()+ ": fingerprint is null"); } else { - free.dump(fingerprintGapFile(this.heapFile, fingerprint)); + File newFingerprintFileGap = fingerprintGapFile(this.heapFile, fingerprint); + if (this.fingerprintFileGap != null && + this.fingerprintFileGap.getName().equals(newFingerprintFileGap.getName()) && + this.fingerprintFileGap.exists()) { + Log.logInfo("HeapReader", "using existing gap dump instead of writing a new one: " + this.fingerprintFileGap.getName()); + } else { + long start = System.currentTimeMillis(); + free.dump(newFingerprintFileGap); + Log.logInfo("HeapReader", "wrote a dump for the " + this.free.size() + " gap entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds."); + } } free.clear(); free = null; if (fingerprint != null) { - index.dump(fingerprintIndexFile(this.heapFile, fingerprint)); - Log.logInfo("kelondroBLOBHeap", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds."); + File newFingerprintFileIdx = fingerprintIndexFile(this.heapFile, fingerprint); + if (this.fingerprintFileIdx != null && + this.fingerprintFileIdx.getName().equals(newFingerprintFileIdx.getName()) && + this.fingerprintFileIdx.exists()) { + Log.logInfo("HeapReader", "using existing idx dump instead of writing a new one: " + this.fingerprintFileIdx.getName()); + } else { + long start = System.currentTimeMillis(); + index.dump(newFingerprintFileIdx); + Log.logInfo("HeapReader", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds."); + } } index.close(); index = null; diff --git a/source/net/yacy/kelondro/util/FileUtils.java b/source/net/yacy/kelondro/util/FileUtils.java index 612ab3c33..64eb38f37 100644 --- a/source/net/yacy/kelondro/util/FileUtils.java +++ b/source/net/yacy/kelondro/util/FileUtils.java @@ -919,7 +919,7 @@ public final class FileUtils { * @param path */ public static void deletedelete(final File path) { - if (!path.exists()) return; + if (path == null || !path.exists()) return; // empty the directory first if (path.isDirectory()) {