From 0e471ba33b2fb140178adf0ce8bbcd8eb4ee127b Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 27 Aug 2009 11:03:21 +0000 Subject: [PATCH] - fixed a bug in fast digest computation - added a open-on-demand hack to heap files: when a heap file is opened the first time, it is first scanned to get a key index and then it is closed again. This will free up file pointers in cases where a really large number of blob files are opened upon initialization of ArrayStack objects. This should solve also a problem reported in http://forum.yacy-websuche.de/viewtopic.php?p=17191#p17191 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6267 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/kelondro/blob/HeapModifier.java | 32 +-------------- .../de/anomic/kelondro/blob/HeapReader.java | 40 ++++++++++++++++++- .../kelondro/io/CachedRandomAccess.java | 28 ++++++++++--- source/de/anomic/kelondro/order/Digest.java | 4 +- 4 files changed, 65 insertions(+), 39 deletions(-) diff --git a/source/de/anomic/kelondro/blob/HeapModifier.java b/source/de/anomic/kelondro/blob/HeapModifier.java index 4f5f812cc..8203f4b9a 100644 --- a/source/de/anomic/kelondro/blob/HeapModifier.java +++ b/source/de/anomic/kelondro/blob/HeapModifier.java @@ -26,8 +26,6 @@ package de.anomic.kelondro.blob; import java.io.File; import java.io.IOException; -import java.util.Iterator; -import java.util.Map; import java.util.SortedMap; import de.anomic.kelondro.io.CachedRandomAccess; @@ -55,37 +53,9 @@ public class HeapModifier extends HeapReader implements BLOB { */ public HeapModifier(final File heapFile, final int keylength, final ByteOrder ordering) throws IOException { super(heapFile, keylength, ordering); - mergeFreeEntries(); } - private void mergeFreeEntries() throws IOException { - - // try to merge free entries - if (super.free.size() > 1) { - int merged = 0; - Map.Entry lastFree, nextFree; - final Iterator> i = this.free.entrySet().iterator(); - lastFree = i.next(); - while (i.hasNext()) { - nextFree = i.next(); - //System.out.println("*** DEBUG BLOB: free-seek = " + nextFree.seek + ", size = " + nextFree.size); - // check if they follow directly - if (lastFree.getKey() + lastFree.getValue() + 4 == nextFree.getKey()) { - // merge those records - this.file.seek(lastFree.getKey()); - lastFree.setValue(lastFree.getValue() + nextFree.getValue() + 4); // this updates also the free map - this.file.writeInt(lastFree.getValue()); - this.file.seek(nextFree.getKey()); - this.file.writeInt(0); - i.remove(); - merged++; - } else { - lastFree = nextFree; - } - } - Log.logInfo("kelondroBLOBHeap", "BLOB " + heapFile.getName() + ": merged " + merged + " free records"); - } - } + /** * clears the content of the database diff --git a/source/de/anomic/kelondro/blob/HeapReader.java b/source/de/anomic/kelondro/blob/HeapReader.java index f43bba6db..c36be83fc 100644 --- a/source/de/anomic/kelondro/blob/HeapReader.java +++ b/source/de/anomic/kelondro/blob/HeapReader.java @@ -63,7 +63,7 @@ public class HeapReader { this.keylength = keylength; this.index = null; // will be created as result of initialization process this.free = null; // will be initialized later depending on existing idx/gap file - this.file = new CachedRandomAccess(heapFile); + this.file = new CachedRandomAccess(this.heapFile); // read or initialize the index if (initIndexReadDump()) { @@ -94,6 +94,15 @@ public class HeapReader { // if we did not have a dump, create a new index initIndexReadFromHeap(); } + + // merge gaps that follow directly + mergeFreeEntries(); + + // after the initial initialization of the heap, we close the file again + // to make more room to file pointers which may run out if the number + // of file descriptors is too low and the number of files is too high + this.file.close(); + // the file will be opened again automatically when the next access to it comes. } private boolean initIndexReadDump() { @@ -205,6 +214,35 @@ public class HeapReader { } + private void mergeFreeEntries() throws IOException { + + // try to merge free entries + if (free.size() > 1) { + int merged = 0; + Map.Entry lastFree, nextFree; + final Iterator> i = this.free.entrySet().iterator(); + lastFree = i.next(); + while (i.hasNext()) { + nextFree = i.next(); + //System.out.println("*** DEBUG BLOB: free-seek = " + nextFree.seek + ", size = " + nextFree.size); + // check if they follow directly + if (lastFree.getKey() + lastFree.getValue() + 4 == nextFree.getKey()) { + // merge those records + this.file.seek(lastFree.getKey()); + lastFree.setValue(lastFree.getValue() + nextFree.getValue() + 4); // this updates also the free map + this.file.writeInt(lastFree.getValue()); + this.file.seek(nextFree.getKey()); + this.file.writeInt(0); + i.remove(); + merged++; + } else { + lastFree = nextFree; + } + } + Log.logInfo("kelondroBLOBHeap", "BLOB " + heapFile.getName() + ": merged " + merged + " free records"); + } + } + public String name() { return this.heapFile.getName(); } diff --git a/source/de/anomic/kelondro/io/CachedRandomAccess.java b/source/de/anomic/kelondro/io/CachedRandomAccess.java index d287d665f..7d54aa7d1 100644 --- a/source/de/anomic/kelondro/io/CachedRandomAccess.java +++ b/source/de/anomic/kelondro/io/CachedRandomAccess.java @@ -38,26 +38,30 @@ public final class CachedRandomAccess extends AbstractRandomAccess implements Ra public CachedRandomAccess(final File file) throws IOException, FileNotFoundException { this.name = file.getName(); this.file = file; - RAFile = new RandomAccessFile(file, "rw"); - cache = new byte[8192]; - cachestart = 0; - cachelen = 0; + this.RAFile = new RandomAccessFile(this.file, "rw"); + this.cache = new byte[8192]; + this.cachestart = 0; + this.cachelen = 0; } public synchronized long length() throws IOException { + checkReopen(); return this.RAFile.length(); } public synchronized void setLength(long length) throws IOException { + checkReopen(); cachelen = 0; RAFile.setLength(length); } public synchronized long available() throws IOException { + checkReopen(); return this.length() - RAFile.getFilePointer(); } public synchronized final void readFully(final byte[] b, final int off, int len) throws IOException { + checkReopen(); long seek = RAFile.getFilePointer(); if (cache != null && cachestart <= seek && cachelen - seek + cachestart >= len) { // read from cache @@ -93,6 +97,7 @@ public final class CachedRandomAccess extends AbstractRandomAccess implements Ra } public synchronized void write(final byte[] b, final int off, final int len) throws IOException { + checkReopen(); //assert len > 0; // write to file if (this.cache.length > 512) { @@ -122,6 +127,7 @@ public final class CachedRandomAccess extends AbstractRandomAccess implements Ra } public synchronized void seek(final long pos) throws IOException { + checkReopen(); RAFile.seek(pos); } @@ -134,10 +140,22 @@ public final class CachedRandomAccess extends AbstractRandomAccess implements Ra } catch (IOException e) { e.printStackTrace(); } - this.file = null; this.cache = null; this.RAFile = null; } + + private void checkReopen() { + if (this.RAFile != null) return; + // re-open the file + try { + this.RAFile = new RandomAccessFile(this.file, "rw"); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + this.cache = new byte[8192]; + this.cachestart = 0; + this.cachelen = 0; + } protected void finalize() throws Throwable { this.close(); diff --git a/source/de/anomic/kelondro/order/Digest.java b/source/de/anomic/kelondro/order/Digest.java index b868dc8d6..9d2871058 100644 --- a/source/de/anomic/kelondro/order/Digest.java +++ b/source/de/anomic/kelondro/order/Digest.java @@ -305,10 +305,10 @@ public class Digest { byte[] a = new byte[mb]; try { raf.seek(0); - raf.readFully(a, 0, mb - 1); + raf.readFully(a, 0, mb); digest.update(a, 0, mb); raf.seek(fl - mb); - raf.readFully(a, 0, mb - 1); + raf.readFully(a, 0, mb); digest.update(a, 0, mb); digest.update(NaturalOrder.encodeLong(fl, 8), 0, 8); if (includeDate) digest.update(NaturalOrder.encodeLong(file.lastModified(), 8), 0, 8);