From 455a763d7cac57cc7ca5a9fbcc93a80211c589ee Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 28 Apr 2010 08:38:57 +0000 Subject: [PATCH] performance hacks git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6845 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../yacy/kelondro/index/RowCollection.java | 19 ++++--- source/net/yacy/kelondro/index/RowSet.java | 56 +++++++++++-------- .../net/yacy/kelondro/order/Base64Order.java | 56 ------------------- .../net/yacy/kelondro/rwi/IODispatcher.java | 6 +- source/net/yacy/kelondro/rwi/IndexCell.java | 2 +- .../kelondro/rwi/ReferenceContainerCache.java | 12 +++- 6 files changed, 61 insertions(+), 90 deletions(-) diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java index a9acee52c..effe2b4da 100644 --- a/source/net/yacy/kelondro/index/RowCollection.java +++ b/source/net/yacy/kelondro/index/RowCollection.java @@ -324,7 +324,7 @@ public class RowCollection implements Iterable, Cloneable { if ((chunkcache == null) || (rowdef == null)) return null; // case may appear during shutdown if (index >= chunkcount) return null; if ((index + 1) * rowdef.objectsize > chunkcache.length) return null; // the whole chunk does not fit into the chunkcache - final byte[] b = new byte[this.rowdef.width(0)]; + final byte[] b = new byte[this.rowdef.primaryKeyLength]; System.arraycopy(chunkcache, index * rowdef.objectsize, b, 0, b.length); return b; } @@ -348,7 +348,10 @@ public class RowCollection implements Iterable, Cloneable { public synchronized final void set(final int index, final Row.Entry a) throws RowSpaceExceededException { assert (index >= 0) : "set: access with index " + index + " is below zero"; ensureSize(index + 1); - final boolean sameKey = match(a.bytes(), 0, a.cellwidth(0), index); + byte[] column = a.bytes(); + assert a.cellwidth(0) == this.rowdef.primaryKeyLength; + assert column.length >= this.rowdef.primaryKeyLength; + final boolean sameKey = match(column, 0, index); //if (sameKey) System.out.print("$"); a.writeToArray(chunkcache, index * rowdef.objectsize); if (index >= this.chunkcount) this.chunkcount = index + 1; @@ -1000,10 +1003,11 @@ public class RowCollection implements Iterable, Cloneable { return c; } - protected synchronized int compare(final byte[] a, final int astart, final int alength, final int chunknumber) { + protected synchronized int compare(final byte[] a, final int astart, final int chunknumber) { assert (chunknumber < chunkcount); - final int l = Math.min(this.rowdef.primaryKeyLength, Math.min(a.length - astart, alength)); - return rowdef.objectOrder.compare(a, astart, chunkcache, chunknumber * this.rowdef.objectsize, l); + assert a.length - astart >= this.rowdef.primaryKeyLength; + final int len = Math.min(a.length - astart, this.rowdef.primaryKeyLength); + return rowdef.objectOrder.compare(a, astart, chunkcache, chunknumber * this.rowdef.objectsize, len); } protected final boolean match(final int i, final int j) { @@ -1023,10 +1027,11 @@ public class RowCollection implements Iterable, Cloneable { return true; } - protected synchronized boolean match(final byte[] a, int astart, final int alength, final int chunknumber) { + protected synchronized boolean match(final byte[] a, int astart, final int chunknumber) { if (chunknumber >= chunkcount) return false; int p = chunknumber * this.rowdef.objectsize; - int len = Math.min(this.rowdef.primaryKeyLength, Math.min(alength, a.length - astart)); + assert a.length - astart >= this.rowdef.primaryKeyLength; + int len = Math.min(a.length - astart, this.rowdef.primaryKeyLength); while (len-- != 0) { if (a[astart++] != chunkcache[p++]) return false; } diff --git a/source/net/yacy/kelondro/index/RowSet.java b/source/net/yacy/kelondro/index/RowSet.java index 4dca8e49a..20b38acae 100644 --- a/source/net/yacy/kelondro/index/RowSet.java +++ b/source/net/yacy/kelondro/index/RowSet.java @@ -101,12 +101,14 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable= 0; } public final synchronized Row.Entry get(final byte[] key) { - final int index = find(key, 0, key.length); + assert key.length == this.rowdef.primaryKeyLength; + final int index = find(key, 0); if (index < 0) return null; return get(index, true); } @@ -118,7 +120,8 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable collectionReSortLimit) { sort(); } - final int index = find(entry.bytes(), 0, super.rowdef.primaryKeyLength); + assert entry.bytes().length >= this.rowdef.primaryKeyLength; + final int index = find(entry.bytes(), 0); if (index < 0) { super.addUnique(entry); } else { @@ -137,7 +140,8 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable collectionReSortLimit) { sort(); } - index = find(entry.bytes(), 0, super.rowdef.primaryKeyLength); + assert entry.bytes().length >= this.rowdef.primaryKeyLength; + index = find(entry.bytes(), 0); if (index < 0) { super.addUnique(entry); } else { @@ -150,7 +154,8 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable= 0) { // the entry existed before final Row.Entry entry = get(index, false); // no clone necessary @@ -176,8 +181,9 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable collectionReSortLimit) { sort(); @@ -212,26 +219,27 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable= 0) return p; // then find in unsorted area - return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount); + return iterativeSearch(a, astart, this.sortBound, this.chunkcount); } - private final int iterativeSearch(final byte[] key, final int astart, final int alength, final int leftBorder, final int rightBound) { + private final int iterativeSearch(final byte[] key, final int astart, final int leftBorder, final int rightBound) { // returns the chunknumber for (int i = leftBorder; i < rightBound; i++) { - if (match(key, astart, alength, i)) return i; + assert key.length - astart >= this.rowdef.primaryKeyLength; + if (match(key, astart, i)) return i; } return -1; } - private final int binarySearch(final byte[] key, final int astart, final int alength) { + private final int binarySearch(final byte[] key, final int astart) { // returns the exact position of the key if the key exists, // or -1 if the key does not exist assert (rowdef.objectOrder != null); @@ -240,15 +248,16 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable> 1); - d = compare(key, astart, alength, p); + p = (l + rbound) >> 1; + assert key.length - astart >= this.rowdef.primaryKeyLength; + d = compare(key, astart, p); if (d == 0) return p; if (d < 0) rbound = p; else l = p + 1; } return -1; } - protected final int binaryPosition(final byte[] key, final int astart, final int alength) { + protected final int binaryPosition(final byte[] key, final int astart) { // returns the exact position of the key if the key exists, // or a position of an entry that is greater than the key if the // key does not exist @@ -258,8 +267,9 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable> 1); - d = compare(key, astart, alength, p); + p = (l + rbound) >> 1; + assert key.length - astart >= this.rowdef.primaryKeyLength; + d = compare(key, astart, p); if (d == 0) return p; if (d < 0) rbound = p; else l = p + 1; } @@ -291,7 +301,8 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable implements ByteOrder, Com i++; continue; } - //acc = ahpla[ac]; - //assert (acc >= 0) : "acc = " + acc + ", a = " + NaturalOrder.arrayList(a, aoffset, al) + "/" + new String(a, aoffset, al) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + NaturalOrder.table(a, 16, aoffset); - //bcc = ahpla[bc]; - //assert (bcc >= 0) : "bcc = " + bcc + ", b = " + NaturalOrder.arrayList(b, boffset, bl) + "/" + new String(b, boffset, bl) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + NaturalOrder.table(b, 16, boffset); - //if (acc > bcc) c = 1; - //if (acc < bcc) c = -1; - //assert c != 0; - //assert ab[(ac << 7) | bc] == c; - //return c; return ab[(ac << 7) | bc]; } // they are equal return 0; } - /* - public final int comparePivot(final byte[] compiledPivot, final byte[] b, final int boffset, final int blength) { - assert zero == null; - assert asc; - assert (boffset + blength <= b.length) : "b.length = " + b.length + ", boffset = " + boffset + ", blength = " + blength; - int i = 0; - final int bl = Math.min(blength, b.length - boffset); - byte acc, bcc; - assert boffset >= 0; - assert boffset < b.length; - assert boffset + Math.min(bl, compiledPivot.length) - 1 >= 0; - assert boffset + Math.min(bl, compiledPivot.length) - 1 < b.length; - byte bb; - while ((i < compiledPivot.length) && (i < bl)) { - acc = compiledPivot[i]; - assert boffset + i >= 0; - assert boffset + i < b.length; - bb = b[boffset + i]; - assert bb >= 0; - assert bb < 128; - bcc = ahpla[bb]; - assert (bcc >= 0) : "bcc = " + bcc + ", b = " + NaturalOrder.arrayList(b, boffset, bl) + "/" + new String(b, boffset, bl) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + NaturalOrder.table(b, 16, boffset); - if (acc > bcc) return 1; - if (acc < bcc) return -1; - // else the bytes are equal and it may go on yet undecided - i++; - } - // compare length - if (compiledPivot.length > bl) return 1; - if (compiledPivot.length < bl) return -1; - // they are equal - return 0; - } - public final byte[] compilePivot(final byte[] a, final int aoffset, final int alength) { - assert (aoffset + alength <= a.length) : "a.length = " + a.length + ", aoffset = " + aoffset + ", alength = " + alength; - final byte[] cp = new byte[Math.min(alength, a.length - aoffset)]; - byte aa; - for (int i = cp.length - 1; i >= 0; i--) { - aa = a[aoffset + i]; - assert aa >= 0; - assert aa < 128; - cp[i] = ahpla[aa]; - assert cp[i] != -1; - } - return cp; - } -*/ public static void main(final String[] s) { // java -classpath classes de.anomic.kelondro.kelondroBase64Order final Base64Order b64 = new Base64Order(true, true); diff --git a/source/net/yacy/kelondro/rwi/IODispatcher.java b/source/net/yacy/kelondro/rwi/IODispatcher.java index 9c95a631f..8d755fb10 100644 --- a/source/net/yacy/kelondro/rwi/IODispatcher.java +++ b/source/net/yacy/kelondro/rwi/IODispatcher.java @@ -83,7 +83,7 @@ public class IODispatcher extends Thread { protected synchronized void dump(ReferenceContainerCache cache, File file, ReferenceContainerArray array) { if (dumpQueue == null || controlQueue == null || !this.isAlive()) { Log.logWarning("IODispatcher", "emergency dump of file " + file.getName()); - if (!cache.isEmpty()) cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize)); + if (!cache.isEmpty()) cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize), true); } else { DumpJob job = (DumpJob)new DumpJob(cache, file, array); try { @@ -98,7 +98,7 @@ public class IODispatcher extends Thread { } } catch (InterruptedException e) { Log.logException(e); - cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize)); + cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize), true); } } } @@ -224,7 +224,7 @@ public class IODispatcher extends Thread { } private void dump() { try { - if (!cache.isEmpty()) cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize)); + if (!cache.isEmpty()) cache.dump(file, (int) Math.min(MemoryControl.available() / 3, writeBufferSize), true); array.mountBLOBFile(file); } catch (IOException e) { Log.logException(e); diff --git a/source/net/yacy/kelondro/rwi/IndexCell.java b/source/net/yacy/kelondro/rwi/IndexCell.java index 24be692e9..b09e62dd8 100644 --- a/source/net/yacy/kelondro/rwi/IndexCell.java +++ b/source/net/yacy/kelondro/rwi/IndexCell.java @@ -328,7 +328,7 @@ public final class IndexCell extends AbstractBu * and is composed of the current date and the cell salt */ public synchronized void close() { - if (!this.ram.isEmpty()) this.ram.dump(this.array.newContainerBLOBFile(), (int) Math.min(MemoryControl.available() / 3, writeBufferSize)); + if (!this.ram.isEmpty()) this.ram.dump(this.array.newContainerBLOBFile(), (int) Math.min(MemoryControl.available() / 3, writeBufferSize), true); // close all this.ram.close(); this.array.close(); diff --git a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java index 71221086b..8b7c43680 100644 --- a/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java +++ b/source/net/yacy/kelondro/rwi/ReferenceContainerCache.java @@ -89,7 +89,16 @@ public final class ReferenceContainerCache exte this.cache = null; } - public void dump(final File heapFile, int writeBuffer) { + /** + * dump the cache to a file. This method can be used in a destructive way + * which means that memory can be freed during the dump. This may be important + * because the dump is done in such situations when memory gets low. To get more + * memory during the dump helps to solve tight memory situations. + * @param heapFile + * @param writeBuffer + * @param destructive - if true then the cache is cleaned during the dump causing to free memory + */ + public void dump(final File heapFile, int writeBuffer, boolean destructive) { assert this.cache != null; Log.logInfo("indexContainerRAMHeap", "creating rwi heap dump '" + heapFile.getName() + "', " + cache.size() + " rwi's"); if (heapFile.exists()) FileUtils.deletedelete(heapFile); @@ -127,6 +136,7 @@ public final class ReferenceContainerCache exte } catch (RowSpaceExceededException e) { Log.logException(e); } + if (destructive) container.clear(); // this memory is not needed any more urlcount += container.size(); } wordcount++;