- some refactoring in BLOBHeap to enable more gap processing functions

- better gap merging in BLOBHeap
- shrinking of heap file if gap is at end of file when file is closed

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5268 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 9d50bfd0b3
commit 998861acfd

@ -29,8 +29,10 @@ package de.anomic.kelondro;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import de.anomic.server.serverMemory; import de.anomic.server.serverMemory;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
@ -38,19 +40,10 @@ import de.anomic.server.logging.serverLog;
public final class kelondroBLOBHeap implements kelondroBLOB { public final class kelondroBLOBHeap implements kelondroBLOB {
private kelondroBytesLongMap index; // key/seek relation for used records private kelondroBytesLongMap index; // key/seek relation for used records
private ArrayList<gap> free; // list of {size, seek} pairs denoting space and position of free records private TreeMap<Long, Integer> free; // list of {size, seek} pairs denoting space and position of free records
private final File heapFile; // the file of the heap private final File heapFile; // the file of the heap
private final kelondroByteOrder ordering; // the ordering on keys private final kelondroByteOrder ordering; // the ordering on keys
private RandomAccessFile file; // a random access to the file private RandomAccessFile file; // a random access to the file
public static class gap {
public long seek;
public int size;
public gap(final long seek, final int size) {
this.seek = seek;
this.size = size;
}
}
/* /*
* This class implements a BLOB management based on a sequence of records in a random access file * This class implements a BLOB management based on a sequence of records in a random access file
@ -87,7 +80,7 @@ public final class kelondroBLOBHeap implements kelondroBLOB {
this.heapFile = heapFile; this.heapFile = heapFile;
this.index = new kelondroBytesLongMap(keylength, this.ordering, 0); this.index = new kelondroBytesLongMap(keylength, this.ordering, 0);
this.free = new ArrayList<gap>(); this.free = new TreeMap<Long, Integer>();
this.file = new RandomAccessFile(heapFile, "rw"); this.file = new RandomAccessFile(heapFile, "rw");
final byte[] key = new byte[keylength]; final byte[] key = new byte[keylength];
int reclen; int reclen;
@ -119,7 +112,7 @@ public final class kelondroBLOBHeap implements kelondroBLOB {
// check if this record is empty // check if this record is empty
if (key == null || key[0] == 0) { if (key == null || key[0] == 0) {
// it is an empty record, store to free list // it is an empty record, store to free list
if (reclen > 0) free.add(new gap(seek, reclen)); if (reclen > 0) free.put(seek, reclen);
} else { } else {
// store key and access address of entry in index // store key and access address of entry in index
try { try {
@ -140,19 +133,19 @@ public final class kelondroBLOBHeap implements kelondroBLOB {
// try to merge free entries // try to merge free entries
if (this.free.size() > 1) { if (this.free.size() > 1) {
int merged = 0; int merged = 0;
gap lastFree, nextFree; Map.Entry<Long, Integer> lastFree, nextFree;
final Iterator<gap> i = this.free.iterator(); final Iterator<Map.Entry<Long, Integer>> i = this.free.entrySet().iterator();
lastFree = i.next(); lastFree = i.next();
while (i.hasNext()) { while (i.hasNext()) {
nextFree = i.next(); nextFree = i.next();
//System.out.println("*** DEBUG BLOB: free-seek = " + nextFree.seek + ", size = " + nextFree.size); //System.out.println("*** DEBUG BLOB: free-seek = " + nextFree.seek + ", size = " + nextFree.size);
// check if they follow directly // check if they follow directly
if (lastFree.seek + lastFree.size + 4 == nextFree.seek) { if (lastFree.getKey() + lastFree.getValue() + 4 == nextFree.getKey()) {
// merge those records // merge those records
file.seek(lastFree.seek); file.seek(lastFree.getKey());
lastFree.size = lastFree.size + nextFree.size + 4; // this updates also the free array lastFree.setValue(lastFree.getValue() + nextFree.getValue() + 4); // this updates also the free map
file.writeInt(lastFree.size); file.writeInt(lastFree.getValue());
file.seek(nextFree.seek); file.seek(nextFree.getKey());
file.write(0);file.write(0);file.write(0);file.write(0); file.write(0);file.write(0);file.write(0);file.write(0);
i.remove(); i.remove();
merged++; merged++;
@ -285,8 +278,10 @@ public final class kelondroBLOBHeap implements kelondroBLOB {
/** /**
* close the BLOB table * close the BLOB table
* @throws
*/ */
public synchronized void close() { public synchronized void close() {
shrinkWithGapsAtEnd();
index.close(); index.close();
free.clear(); free.clear();
try { try {
@ -328,34 +323,34 @@ public final class kelondroBLOBHeap implements kelondroBLOB {
long lseek = -1; long lseek = -1;
int lsize = 0; int lsize = 0;
final int reclen = b.length + index.row().primaryKeyLength; final int reclen = b.length + index.row().primaryKeyLength;
gap entry; Map.Entry<Long, Integer> entry;
Iterator<gap> i = this.free.iterator(); Iterator<Map.Entry<Long, Integer>> i = this.free.entrySet().iterator();
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
if (entry.size == reclen) { if (entry.getValue().intValue() == reclen) {
// we found an entry that has exactly the size that we need! // we found an entry that has exactly the size that we need!
// we use that entry and stop looking for a larger entry // we use that entry and stop looking for a larger entry
file.seek(entry.seek); file.seek(entry.getKey());
final int reclenf = file.readInt(); final int reclenf = file.readInt();
assert reclenf == reclen; assert reclenf == reclen;
file.write(key); file.write(key);
file.write(b); file.write(b);
// add the entry to the index
this.index.putl(key, entry.getKey());
// remove the entry from the free list // remove the entry from the free list
i.remove(); i.remove();
// add the entry to the index //System.out.println("*** DEBUG BLOB: replaced-fit record at " + entry.seek + ", reclen=" + reclen + ", key=" + new String(key));
this.index.putl(key, entry.seek);
//System.out.println("*** DEBUG BLOB: replaced-fit record at " + entry.seek + ", reclen=" + reclen + ", key=" + new String(key));
// finished! // finished!
return; return;
} }
// look for the biggest size // look for the biggest size
if (entry.size > lsize) { if (entry.getValue() > lsize) {
lseek = entry.seek; lseek = entry.getKey();
lsize = entry.size; lsize = entry.getValue();
} }
} }
@ -381,18 +376,10 @@ public final class kelondroBLOBHeap implements kelondroBLOB {
file.writeInt(newfreereclen); file.writeInt(newfreereclen);
// remove the old free entry // remove the old free entry
i = this.free.iterator(); this.free.remove(lseek);
while (i.hasNext()) {
entry = i.next();
if (entry.size == (long) lsize && entry.seek == lseek) {
// remove the entry from the free list
i.remove();
break;
}
}
// add a new free entry // add a new free entry
free.add(new gap(lseek + 4 + reclen, newfreereclen)); this.free.put(lseek + 4 + reclen, newfreereclen);
//System.out.println("*** DEBUG BLOB: replaced-split record at " + lseek + ", reclen=" + reclen + ", new reclen=" + newfreereclen + ", key=" + new String(key)); //System.out.println("*** DEBUG BLOB: replaced-split record at " + lseek + ", reclen=" + reclen + ", new reclen=" + newfreereclen + ", key=" + new String(key));
@ -414,84 +401,127 @@ public final class kelondroBLOBHeap implements kelondroBLOB {
assert index.row().primaryKeyLength == key.length; assert index.row().primaryKeyLength == key.length;
// check if the index contains the key // check if the index contains the key
final long pos = index.getl(key); final long seek = index.getl(key);
if (pos < 0) return; if (seek < 0) return;
// access the file and read the container // access the file and read the container
this.file.seek(pos); this.file.seek(seek);
int len = file.readInt(); int size = file.readInt();
//assert seek + size + 4 <= this.file.length() : heapFile.getName() + ": too long size " + size + " in record at " + seek;
long filelength = this.file.length(); // put in separate variable for debugging
if (seek + size + 4 > filelength) {
serverLog.logSevere("BLOBHeap", heapFile.getName() + ": too long size " + size + " in record at " + seek);
throw new IOException(heapFile.getName() + ": too long size " + size + " in record at " + seek);
}
// add entry to free array // add entry to free array
gap thisGap = new gap(pos, len); this.free.put(seek, size);
this.free.add(thisGap);
// fill zeros to the content // fill zeros to the content
while (len-- > 0) this.file.write(0); int l = size; while (l-- > 0) this.file.write(0);
// remove entry from index // remove entry from index
this.index.removel(key); this.index.removel(key);
// recursively merge gaps // recursively merge gaps
tryMergeGaps(thisGap); tryMergeNextGaps(seek, size);
tryMergePreviousGap(seek);
}
private void tryMergePreviousGap(final long thisSeek) throws IOException {
// this is called after a record has been removed. That may cause that a new
// empty record was surrounded by gaps. We merge with a previous gap, if this
// is also empty, but don't do that recursively
// If this is successful, it removes the given marker for thisSeed and
// because of this, this method MUST be called AFTER tryMergeNextGaps was called.
// first find the gap entry for the closest gap in front of the give gap
SortedMap<Long, Integer> head = this.free.headMap(thisSeek);
if (head.size() == 0) return;
long previousSeek = head.lastKey().longValue();
int previousSize = head.get(previousSeek).intValue();
// check if this is directly in front
if (previousSeek + previousSize + 4 == thisSeek) {
// right in front! merge the gaps
Integer thisSize = this.free.get(thisSeek);
assert thisSize != null;
mergeGaps(previousSeek, previousSize, thisSeek, thisSize.intValue());
}
} }
private void tryMergeGaps(final gap thisGap) throws IOException { private void tryMergeNextGaps(final long thisSeek, final int thisSize) throws IOException {
// try to merge two gaps if one gap has been processed already and the position of the next record is known // try to merge two gaps if one gap has been processed already and the position of the next record is known
// if the next record is also a gap, merge these gaps and go on recursively // if the next record is also a gap, merge these gaps and go on recursively
// first check if next gap position is outside of file size // first check if next gap position is outside of file size
long nextRecord = thisGap.seek + thisGap.size + 4; long nextSeek = thisSeek + thisSize + 4;
if (nextRecord >= this.file.length()) return; // end of recursion if (nextSeek >= this.file.length()) return; // end of recursion
// move to next position and read record size // move to next position and read record size
this.file.seek(nextRecord); Integer nextSize = this.free.get(nextSeek);
int len = file.readInt(); if (nextSize == null) return; // finished, this is not a gap
// check if the record is a gap-record // check if the record is a gap-record
assert len > 0; assert nextSize.intValue() > 0;
if (len == 0) { if (nextSize.intValue() == 0) {
// a strange gap record: we can extend the thisGap with four bytes // a strange gap record: we can extend the thisGap with four bytes
// the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap
mergeGaps(thisSeek, thisSize, nextSeek, 0);
// recursively go on
tryMergeNextGaps(thisSeek, thisSize + 4);
} else { } else {
// check if this is a true gap!
this.file.seek(nextSeek + 4);
int t = this.file.read(); int t = this.file.read();
assert t == 0;
if (t == 0) { if (t == 0) {
System.out.println("*** DEBUG-BLOBHeap " + heapFile.getName() + ": merging gap from pos " + thisGap.seek + ", len " + thisGap.size + " with next record of size " + len + " (+ 4)");
// the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap // the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap
gap g = removeFromFree(nextRecord, len); mergeGaps(thisSeek, thisSize, nextSeek, nextSize.intValue());
assert g != null;
// remove also the current gap, we will write a new gap entry
removeFromFree(thisGap.seek, thisGap.size);
// overwrite the size bytes
this.file.seek(nextRecord);
this.file.write(0);this.file.write(0);this.file.write(0);this.file.write(0);
// the new size of the current gap: old size + len + 4
int newLen = thisGap.size + 4 + len;
this.file.seek(thisGap.seek);
this.file.writeInt(newLen);
// register new gap in the free array
gap newGap = new gap(thisGap.seek, newLen);
this.free.add(newGap);
// recursively go on // recursively go on
tryMergeGaps(newGap); tryMergeNextGaps(thisSeek, thisSize + 4 + nextSize.intValue());
} }
} }
} }
private gap removeFromFree(long pos, int len) { private void mergeGaps(final long seek0, final int size0, final long seek1, final int size1) throws IOException {
Iterator<gap> i = this.free.iterator(); System.out.println("*** DEBUG-BLOBHeap " + heapFile.getName() + ": merging gap from pos " + seek0 + ", len " + size0 + " with next record of size " + size1 + " (+ 4)");
gap g;
while (i.hasNext()) { Integer g = this.free.remove(seek1); // g is only used for debugging
g = i.next(); assert g != null;
if (g.seek == pos && g.size == len) { assert g.intValue() == size1;
i.remove();
return g; // overwrite the size bytes of next records with zeros
this.file.seek(seek1);
this.file.write(0);this.file.write(0);this.file.write(0);this.file.write(0);
// the new size of the current gap: old size + len + 4
int newSize = size0 + 4 + size1;
this.file.seek(seek0);
this.file.writeInt(newSize);
// register new gap in the free array; overwrite old gap entry
g = this.free.put(seek0, newSize);
assert g != null;
assert g.intValue() == size0;
}
private void shrinkWithGapsAtEnd() {
// find gaps at the end of the file and shrink the file by these gaps
try {
while (this.free.size() > 0) {
Long seek = this.free.lastKey();
int size = this.free.get(seek).intValue();
if (seek.longValue() + size + 4 != this.file.length()) return;
// shrink the file
this.file.setLength(seek.longValue());
this.free.remove(seek);
} }
} catch (IOException e) {
// do nothing
} }
return null;
} }
/** /**

Loading…
Cancel
Save