diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index 2fbd8de3b..c9579c5c0 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -60,7 +60,7 @@ public class kelondroBytesIntMap extends kelondroRowBufferedSet { if (System.currentTimeMillis() - this.lastTimeWrote > 10000) this.trim(); return -1; } - kelondroRow.Entry indexentry = remove(key); + kelondroRow.Entry indexentry = removeMarked(key); if (indexentry == null) return -1; return (int) indexentry.getColLongB256(1); } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index d2eed8b6e..e42450ab6 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -54,7 +54,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr if ((i % 10000) == 0) System.out.print('.'); } this.index.setOrdering(kelondroNaturalOrder.naturalOrder, 0); - index.sort(); + index.shape(); System.out.println(index.size() + " index entries initialized and sorted"); } diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index 952647522..1bb582e01 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -60,7 +60,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet { if (System.currentTimeMillis() - this.lastTimeWrote > 10000) this.trim(); return null; } - kelondroRow.Entry indexentry = super.remove(kelondroNaturalOrder.encodeLong((long) ii, 4)); + kelondroRow.Entry indexentry = super.removeMarked(kelondroNaturalOrder.encodeLong((long) ii, 4)); if (indexentry == null) return null; return indexentry.getColBytes(1); } diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index 682cd64b5..4e216b32c 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -81,7 +81,7 @@ public class kelondroRecords { // constants private static final int NUL = Integer.MIN_VALUE; // the meta value for the kelondroRecords' NUL abstraction - private static final long memBlock = 500000; // do not fill cache further if the amount of available memory is less that this + private static final long memBlock = 50000000; // do not fill cache further if the amount of available memory is less that this public final static boolean useWriteBuffer = false; // memory calculation @@ -427,9 +427,9 @@ public class kelondroRecords { this.cacheScore = new kelondroMScoreCluster(); // cache control of CP_HIGH caches } this.cacheHeaders = new kelondroIntBytesMap[]{ - new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 2), new kelondroIntBytesMap(this.headchunksize, 0), - new kelondroIntBytesMap(this.headchunksize, 0) + new kelondroIntBytesMap(this.headchunksize, 0), + new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 2) }; this.cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0); this.cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0); @@ -489,6 +489,20 @@ public class kelondroRecords { }; } + public String cacheNodeStatusString() { + return + "cacheMaxSize=" + cacheSize + + ", CP_HIGH=" + cacheHeaders[CP_HIGH].size() + + ", CP_MEDIUM=" + cacheHeaders[CP_MEDIUM].size() + + ", CP_LOW=" + cacheHeaders[CP_LOW].size() + + ", readHit=" + readHit + + ", readMiss=" + readMiss + + ", writeUnique=" + writeUnique + + ", writeDouble=" + writeDouble + + ", cacheDelete=" + cacheDelete + + ", cacheFlush=" + cacheFlush; + } + private static int[] cacheCombinedStatus(int[] a, int[] b) { int[] c = new int[a.length]; for (int i = a.length - 1; i >= 0; i--) c[i] = a[i] + b[i]; @@ -916,9 +930,9 @@ public class kelondroRecords { // we simply clear the cache String error = "cachScore error: " + e.getMessage() + "; cachesize=" + cacheSize + ", cache.size()=[" + cacheHeaders[0].size() + "," + cacheHeaders[1].size() + "," + cacheHeaders[2].size() + "], cacheScore.size()=" + cacheScore.size(); cacheScore = new kelondroMScoreCluster(); - cacheHeaders[CP_LOW] = new kelondroIntBytesMap(headchunksize, cacheSize / 2); + cacheHeaders[CP_LOW] = new kelondroIntBytesMap(headchunksize, 0); cacheHeaders[CP_MEDIUM] = new kelondroIntBytesMap(headchunksize, 0); - cacheHeaders[CP_HIGH] = new kelondroIntBytesMap(headchunksize, 0); + cacheHeaders[CP_HIGH] = new kelondroIntBytesMap(headchunksize, cacheSize / 2); cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0); cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0); cacheHeaders[2].setOrdering(kelondroNaturalOrder.naturalOrder, 0); diff --git a/source/de/anomic/kelondro/kelondroRowBufferedSet.java b/source/de/anomic/kelondro/kelondroRowBufferedSet.java index b4e9e60f6..f5240259e 100644 --- a/source/de/anomic/kelondro/kelondroRowBufferedSet.java +++ b/source/de/anomic/kelondro/kelondroRowBufferedSet.java @@ -164,13 +164,13 @@ public class kelondroRowBufferedSet extends kelondroRowSet { } } - public kelondroRow.Entry remove(byte[] a) { + public kelondroRow.Entry removeShift(byte[] a) { synchronized (buffer) { if (useRowCollection) { kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.remove(a); if (oldentry == null) { // try the collection - return super.remove(a); + return super.removeShift(a); } else { // the entry was in buffer return oldentry; @@ -181,11 +181,28 @@ public class kelondroRowBufferedSet extends kelondroRowSet { } } - public void removeAll(kelondroRowCollection c) { + public kelondroRow.Entry removeMarked(byte[] a) { + synchronized (buffer) { + if (useRowCollection) { + kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.remove(a); + if (oldentry == null) { + // try the collection + return super.removeMarked(a); + } else { + // the entry was in buffer + return oldentry; + } + } else { + return (kelondroRow.Entry) buffer.remove(a); // test + } + } + } + + public void removeMarkedAll(kelondroRowCollection c) { // this can be enhanced synchronized (buffer) { flush(); - super.removeAll(c); + super.removeMarkedAll(c); } } @@ -195,8 +212,8 @@ public class kelondroRowBufferedSet extends kelondroRowSet { c.setOrdering(kelondroNaturalOrder.naturalOrder, 0); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); - c.sort(); - c.remove("fuenf".getBytes()); + c.removeMarked("fuenf".getBytes()); + c.shape(); Iterator i = c.elements(); String s; System.out.print("INPUT-ITERATOR: "); @@ -207,7 +224,7 @@ public class kelondroRowBufferedSet extends kelondroRowSet { } System.out.println(""); System.out.println("INPUT-TOSTRING: " + c.toString()); - c.sort(); + c.shape(); System.out.println("SORTED : " + c.toString()); c.uniq(); System.out.println("UNIQ : " + c.toString()); @@ -232,7 +249,7 @@ public class kelondroRowBufferedSet extends kelondroRowSet { " entries/second, size = " + c.size()); } System.out.println("bevore sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); - c.sort(); + c.shape(); System.out.println("after sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); c.uniq(); System.out.println("after uniq: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index d24da88e3..e3e610766 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -145,13 +145,18 @@ public class kelondroRowCollection { } } - public final void remove(int p) { + protected final void removeShift(int pos, int dist, int upBound) { + System.arraycopy(chunkcache, (pos + dist) * rowdef.objectsize(), + chunkcache, pos * rowdef.objectsize(), + (upBound - pos - dist) * rowdef.objectsize()); + if ((pos < sortBound) && (upBound >= sortBound)) sortBound -= dist; + } + + public final void removeShift(int p) { assert ((p >= 0) && (p < chunkcount) && (chunkcount > 0)); //System.out.println("REMOVE at pos " + p + ", chunkcount=" + chunkcount + ", sortBound=" + sortBound); synchronized (chunkcache) { - System.arraycopy(chunkcache, (p + 1) * rowdef.objectsize(), chunkcache, p * rowdef.objectsize(), (chunkcount - p - 1) * rowdef.objectsize()); - chunkcount--; - if (p < sortBound) sortBound--; + removeShift(p, 1, chunkcount--); } this.lastTimeWrote = System.currentTimeMillis(); } @@ -299,7 +304,7 @@ public class kelondroRowCollection { swap(j, j - 1, 0); } - private final int swap(int i, int j, int p) { + protected final int swap(int i, int j, int p) { if (i == j) return p; if (this.chunkcount * this.rowdef.objectsize() < this.chunkcache.length) { // there is space in the chunkcache that we can use as buffer @@ -326,7 +331,7 @@ public class kelondroRowCollection { while (i < chunkcount - 1) { if (compare(i, i + 1) == 0) { //System.out.println("DOUBLE: " + new String(this.chunkcache, this.chunksize * i, this.chunksize)); - remove(i); + removeShift(i); } else { i++; } @@ -368,5 +373,5 @@ public class kelondroRowCollection { */ return c; } - + } diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 56cf54d2a..8ee362a22 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -24,19 +24,25 @@ package de.anomic.kelondro; +import java.util.TreeSet; import java.util.Iterator; import java.util.Random; public class kelondroRowSet extends kelondroRowCollection { private static final int collectionReSortLimit = 90; + private static final int removeMaxSize = 100; + + private TreeSet removeMarker; public kelondroRowSet(kelondroRow rowdef) { super(rowdef); + this.removeMarker = new TreeSet(); } public kelondroRowSet(kelondroRow rowdef, int objectCount) { super(rowdef, objectCount); + this.removeMarker = new TreeSet(); } public kelondroRow.Entry get(byte[] key) { @@ -45,17 +51,24 @@ public class kelondroRowSet extends kelondroRowCollection { private kelondroRow.Entry get(byte[] key, int astart, int alength) { synchronized (chunkcache) { - int i = find(key, astart, alength); - if (i >= 0) return get(i); + int index = find(key, astart, alength); + if ((index < 0) || (isMarkedRemoved(index))){ + return null; + } else { + return get(index); + } } - return null; } public kelondroRow.Entry put(kelondroRow.Entry entry) { int index = -1; synchronized (chunkcache) { index = find(entry.bytes(), super.rowdef.colstart[super.sortColumn], super.rowdef.width(super.sortColumn)); - if (index < 0) { + if (isMarkedRemoved(index)) { + set(index, entry); + removeMarker.remove(new Integer(index)); + return null; + } else if (index < 0) { add(entry); return null; } else { @@ -66,11 +79,92 @@ public class kelondroRowSet extends kelondroRowCollection { } } - public kelondroRow.Entry remove(byte[] a) { - return remove(a, 0, a.length); + public int size() { + return super.size() - removeMarker.size(); + } + + public kelondroRow.Entry removeMarked(byte[] a) { + return removeMarked(a, 0, a.length); + } + + private kelondroRow.Entry removeMarked(byte[] a, int astart, int alength) { + // the byte[] a may be shorter than the chunksize + if (chunkcount == 0) return null; + kelondroRow.Entry b = null; + synchronized(chunkcache) { + int p = find(a, astart, alength); + if (p < 0) return null; + b = get(p); + if (p < sortBound) { + removeMarker.add(new Integer(p)); + } else { + super.swap(p, --chunkcount, 0); + } + } + if (removeMarker.size() == chunkcount) { + chunkcount = 0; + sortBound = 0; + removeMarker.clear(); + } + if (removeMarker.size() >= removeMaxSize) resolveMarkedRemoved(); + return b; + } + + private boolean isMarkedRemoved(int index) { + return removeMarker.contains(new Integer(index)); + } + + public void shape() { + //System.out.println("SHAPE"); + synchronized (chunkcache) { + resolveMarkedRemoved(); + super.sort(); + } + } + + /* + private void resolveMarkedRemoved1() { + //long start = System.currentTimeMillis(); + //int c = removeMarker.size(); + Integer idx = new Integer(sortBound); + while (removeMarker.size() > 0) { + idx = (Integer) removeMarker.last(); + removeMarker.remove(idx); + chunkcount--; + if (idx.intValue() < chunkcount) { + super.swap(idx.intValue(), chunkcount, 0); + } + } + if (idx.intValue() < sortBound) sortBound = idx.intValue(); + removeMarker.clear(); + //System.out.println("RESOLVED " + c + " entries in " + (System.currentTimeMillis() - start) + " milliseconds"); + } + */ + + private void resolveMarkedRemoved() { + if (removeMarker.size() == 0) return; + Integer nxt = (Integer) removeMarker.first(); + removeMarker.remove(nxt); + int idx = nxt.intValue(); + int d = 1; + while (removeMarker.size() > 0) { + nxt = (Integer) removeMarker.first(); + removeMarker.remove(nxt); + super.removeShift(idx, d, nxt.intValue()); + idx = nxt.intValue() - d; + d++; + } + super.removeShift(idx, d, chunkcount); + chunkcount -= d; + removeMarker.clear(); + } + + + protected kelondroRow.Entry removeShift(byte[] a) { + return removeShift(a, 0, a.length); } - private kelondroRow.Entry remove(byte[] a, int astart, int alength) { + private kelondroRow.Entry removeShift(byte[] a, int astart, int alength) { // the byte[] a may be shorter than the chunksize if (chunkcount == 0) return null; kelondroRow.Entry b = null; @@ -78,17 +172,21 @@ public class kelondroRowSet extends kelondroRowCollection { int p = find(a, astart, alength); if (p < 0) return null; b = get(p); - remove(p); + if (p < sortBound) { + removeShift(p); + } else { + super.swap(p, --chunkcount, 0); + } } return b; } - public void removeAll(kelondroRowCollection c) { + public void removeMarkedAll(kelondroRowCollection c) { Iterator i = c.elements(); byte[] b; while (i.hasNext()) { b = (byte[]) i.next(); - remove(b, 0, b.length); + removeMarked(b, 0, b.length); } } @@ -113,7 +211,7 @@ public class kelondroRowSet extends kelondroRowCollection { if (this.sortOrder == null) return iterativeSearch(a, astart, alength); // check if a re-sorting make sense - if ((this.chunkcount - this.sortBound) > collectionReSortLimit) sort(); + if ((this.chunkcount - this.sortBound) > collectionReSortLimit) shape(); // first try to find in sorted area int p = binarySearch(a, astart, alength); @@ -177,8 +275,8 @@ public class kelondroRowSet extends kelondroRowCollection { c.setOrdering(kelondroNaturalOrder.naturalOrder, 0); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); - c.sort(); - c.remove("fuenf".getBytes(), 0, 5); + c.shape(); + c.removeMarked("fuenf".getBytes(), 0, 5); Iterator i = c.elements(); String s; System.out.print("INPUT-ITERATOR: "); @@ -189,7 +287,7 @@ public class kelondroRowSet extends kelondroRowCollection { } System.out.println(""); System.out.println("INPUT-TOSTRING: " + c.toString()); - c.sort(); + c.shape(); System.out.println("SORTED : " + c.toString()); c.uniq(); System.out.println("UNIQ : " + c.toString()); @@ -214,7 +312,7 @@ public class kelondroRowSet extends kelondroRowCollection { " entries/second, size = " + c.size()); } System.out.println("bevore sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); - c.sort(); + c.shape(); System.out.println("after sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); c.uniq(); System.out.println("after uniq: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");