diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index e4b7c8311..761376341 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -61,7 +61,6 @@ import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; -import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; diff --git a/source/de/anomic/kelondro/kelondroAbstractOrder.java b/source/de/anomic/kelondro/kelondroAbstractOrder.java index 613ff48fe..0f30537d3 100644 --- a/source/de/anomic/kelondro/kelondroAbstractOrder.java +++ b/source/de/anomic/kelondro/kelondroAbstractOrder.java @@ -74,6 +74,20 @@ public abstract class kelondroAbstractOrder implements kelondroOrder { throw new IllegalArgumentException("Object type or Object type combination not supported: a=" + a + ", b=" + b); } + public int compare(byte[] a, byte[] b, int boffset, int blength) { + int l = Math.min(a.length, blength); + byte[] bb = new byte[l]; + System.arraycopy(b, boffset, bb, 0, l); + return compare(a, bb); + } + + public int compare(byte[] a, int aoffset, int alength, byte[] b, int boffset, int blength) { + int l = Math.min(alength, blength); + byte[] aa = new byte[l]; + System.arraycopy(a, aoffset, aa, 0, l); + return compare(aa, b, boffset, blength); + } + public byte[] zero() { return zero; } diff --git a/source/de/anomic/kelondro/kelondroCollection.java b/source/de/anomic/kelondro/kelondroCollection.java index f950a377d..c0090e39b 100644 --- a/source/de/anomic/kelondro/kelondroCollection.java +++ b/source/de/anomic/kelondro/kelondroCollection.java @@ -49,26 +49,26 @@ public class kelondroCollection { private byte[] chunkcache; private int chunkcount; private int chunksize; + private int sortbound; private long lastTimeRead, lastTimeWrote; - private String orderkey; + private kelondroOrder order; public kelondroCollection(int objectSize) { this(objectSize, 0, null, new byte[0]); } - public kelondroCollection(int objectSize, int objectCount, String signature, byte[] collectioncache) { - assert (collectioncache.length % objectSize == 0); - assert (objectCount <= collectioncache.length / objectSize); + public kelondroCollection(int objectSize, int objectCount, kelondroOrder ordering, byte[] cache) { this.chunksize = objectSize; - this.chunkcache = collectioncache; + this.chunkcache = cache; this.chunkcount = objectCount; - this.orderkey = signature; // no current ordering + this.order = ordering; + this.sortbound = 0; } private void ensureSize(int elements) { int needed = elements * chunksize; if (chunkcache.length >= needed) return; - byte[] newChunkcache = new byte[needed]; + byte[] newChunkcache = new byte[needed * 2]; System.arraycopy(chunkcache, 0, newChunkcache, 0, chunkcache.length); chunkcache = newChunkcache; newChunkcache = null; @@ -117,7 +117,6 @@ public class kelondroCollection { ensureSize(chunkcount + 1); System.arraycopy(a, 0, chunkcache, chunksize * chunkcount, a.length); chunkcount++; - this.orderkey = null; } this.lastTimeWrote = System.currentTimeMillis(); } @@ -142,11 +141,11 @@ public class kelondroCollection { } } - public void remove(byte[] a, Comparator c) { + public void remove(byte[] a, kelondroOrder ko) { // the byte[] a may be shorter than the chunksize if (chunkcount == 0) return; synchronized(chunkcache) { - int p = find(a, c); + int p = find(a); remove(p); } } @@ -156,25 +155,10 @@ public class kelondroCollection { if ((p < 0) || (p >= chunkcount)) return; // out of bounds, nothing to delete System.arraycopy(chunkcache, (p + 1) * chunksize, chunkcache, p * chunksize, (chunkcount - p - 1) * chunksize); chunkcount--; + if (p < sortbound) sortbound--; this.lastTimeWrote = System.currentTimeMillis(); } - private int find(byte[] a) { - // returns the chunknumber - for (int i = 0; i < chunkcount; i++) { - if (match(a, i)) return i; - } - return -1; - } - - private int find(byte[] a, Comparator c) { - // returns the chunknumber - for (int i = 0; i < chunkcount; i++) { - if (compare(a, i, c) == 0) return i; - } - return -1; - } - public void removeAll(kelondroCollection c) { Iterator i = c.elements(); while (i.hasNext()) remove((byte[]) i.next()); @@ -183,14 +167,13 @@ public class kelondroCollection { public void clear() { this.chunkcount = 0; this.chunkcache = new byte[0]; - this.orderkey = null; + this.order = null; } public int size() { return chunkcount; } - public Iterator elements() { // iterates byte[] - objects return new chunkIterator(); } @@ -222,88 +205,151 @@ public class kelondroCollection { } - public String getOrderingSignature() { - return this.orderkey; + public kelondroOrder getOrdering() { + return this.order; } - public int binarySearch(byte[] key, Comparator c) { - assert (this.orderkey != null); + private int find(byte[] a) { + // returns the chunknumber; -1 if not found + + if (this.order == null) return iterativeSearch(a); + + // check if a re-sorting make sense + if (this.chunkcount - this.sortbound > 3000) sort(); + + // first try to find in sorted area + int p = iterativeSearch(a); + if (p >= 0) return p; + + // then find in unsorted area + return binarySearch(a); + + } + + private int iterativeSearch(byte[] key) { + // returns the chunknumber + + if (this.order == null) { + for (int i = this.sortbound; i < this.chunkcount; i++) { + if (match(key, i)) return i; + } + return -1; + } else { + for (int i = this.sortbound; i < this.chunkcount; i++) { + if (compare(key, i) == 0) return i; + } + return -1; + } + } + + private int binarySearch(byte[] key) { + assert (this.order != null); int l = 0; - int r = chunkcount - 1; + int rbound = this.sortbound; int p = 0; int d; - while (l <= r) { - p = (l + r) >> 1; - d = compare(key, p, c); + while (l < rbound) { + p = l + ((rbound - l) >> 1); + d = compare(key, p); if (d == 0) return p; - else if (d < 0) r = p - 1; - else l = ++p; + else if (d < 0) rbound = p; + else l = p + 1; } - return -p - 1; + return -1; } - public void sort(kelondroOrder ko) { - if (this.orderkey == ko.signature()) return; // this is already sorted - qsort(0, chunkcount - 1, (Comparator) ko); - this.orderkey = ko.signature(); + public void sort() { + if (this.sortbound == this.chunkcount) return; // this is already sorted + System.out.println("SORT"); + if (this.sortbound > 1) qsort(0, this.sortbound, this.chunkcount); + else qsort(0, this.chunkcount); + this.sortbound = this.chunkcount; } - public void sort(int fromIndex, int toIndex, Comparator c) { - assert (fromIndex <= toIndex); - assert (fromIndex >= 0); - synchronized(chunkcache) { - qsort(fromIndex, toIndex, c); - } - } - - private void swap(int i, int j) { + private void qsort(int l, int sbound, int rbound) { + //System.out.println("QSORT: chunkcache.length=" + chunkcache.length + ", chunksize=" + chunksize + ", l=" + l + ", sbound=" + sbound + ", rbound=" + rbound); + assert (sbound <= rbound); + if (l >= rbound - 1) return; + + if (rbound - l < 1000) { + isort(l, rbound); + return; + } + + int p = l + ((sbound - l) / 2); + int q = sbound; + int qs = q; byte[] a = new byte[chunksize]; - System.arraycopy(chunkcache, chunksize * i, a, 0, chunksize); - System.arraycopy(chunkcache, chunksize * j , chunkcache, chunksize * i, chunksize); - System.arraycopy(a, 0, chunkcache, chunksize * j, chunksize); - } - - private void isort(int l, int r, Comparator c) { - for (int i = l + 1; i <= r; i++) - for (int j = i; j > l && compare(j - 1, j, c) > 0; j--) - swap(j, j - 1); + try { + System.arraycopy(chunkcache, p * chunksize, a, 0, chunksize); + } catch (ArrayIndexOutOfBoundsException e) { + System.out.println("EXCEPTION: chunkcache.length=" + chunkcache.length + ", p=" + p + ", chunksize=" + chunksize + ", l=" + l + ", sbound=" + sbound + ", rbound=" + rbound); + System.exit(-1); + } + p++; + int ps = p; + while (q < rbound) { + if (compare(a, q) < 1) { + q++; + } else { + swap(p, q); + p++; + q++; + } + } + if (qs < p) qs = p; + if ((ps - l) <= ((p - l) / 2)) qsort(l, p); else qsort(l, ps, p); + if ((qs - p) <= ((q - p) / 2)) qsort(p, q); else qsort(p, qs, q); } - private void qsort(int l, int r, Comparator c) { - if (l >= r) return; + private void qsort(int l, int rbound) { + if (l >= rbound - 1) return; - if (r - l < 10) { - isort(l, r, c); + if (rbound - l < 10) { + isort(l, rbound); return; } int i = l; - int j = r; + int j = rbound - 1; byte[] a = new byte[chunksize]; int pivot = (i + j) / 2; System.arraycopy(chunkcache, pivot * chunksize, a, 0, chunksize); while (i <= j) { - while (compare(a, i, c) == 1) i++; // chunkAt[i] < keybuffer - while (compare(a, j, c) == -1) j--; // chunkAt[j] > keybuffer + while (compare(a, i) == 1) i++; // chunkAt[i] < keybuffer + while (compare(a, j) == -1) j--; // chunkAt[j] > keybuffer if (i <= j) { swap(i, j); i++; j--; } } - qsort(l, j, c); - qsort(i, r, c); + qsort(l, i); + qsort(i, rbound); + } + + private void isort(int l, int rbound) { + for (int i = l + 1; i < rbound; i++) + for (int j = i; j > l && compare(j - 1, j) > 0; j--) + swap(j, j - 1); } - public void uniq(Comparator c) { - assert (this.orderkey != null); + private void swap(int i, int j) { + byte[] a = new byte[chunksize]; + System.arraycopy(chunkcache, chunksize * i, a, 0, chunksize); + System.arraycopy(chunkcache, chunksize * j , chunkcache, chunksize * i, chunksize); + System.arraycopy(a, 0, chunkcache, chunksize * j, chunksize); + } + + public void uniq() { + assert (this.order != null); // removes double-occurrences of chunks // this works only if the collection was ordered with sort before synchronized (chunkcache) { if (chunkcount <= 1) return; int i = 0; while (i < chunkcount - 1) { - if (compare(i, i + 1, c) == 0) { + if (compare(i, i + 1) == 0) { remove(i); } else { i++; @@ -325,59 +371,69 @@ public class kelondroCollection { } public boolean match(byte[] a, int chunknumber) { - if (chunknumber >= chunkcount) - return false; + if (chunknumber >= chunkcount) return false; int i = 0; int p = chunknumber * chunksize; final int len = a.length; - if (len > chunksize) - return false; + if (len > chunksize) return false; while (i < len) - if (a[i++] != chunkcache[p++]) - return false; + if (a[i++] != chunkcache[p++]) return false; return true; } - public int compare(byte[] a, int chunknumber, Comparator c) { - // this can be enhanced + public int compare(byte[] a, int chunknumber) { assert (chunknumber < chunkcount); - byte[] b = new byte[chunksize]; - System.arraycopy(chunkcache, chunknumber * chunksize, b, 0, chunksize); - return c.compare(a, b); + int l = Math.min(a.length, chunksize); + return this.order.compare(a, chunkcache, chunknumber * chunksize, l); } - public int compare(int i, int j, Comparator c) { + public int compare(int i, int j) { // this can be enhanced assert (i < chunkcount); assert (j < chunkcount); byte[] a = new byte[chunksize]; - byte[] b = new byte[chunksize]; System.arraycopy(chunkcache, i * chunksize, a, 0, chunksize); - System.arraycopy(chunkcache, j * chunksize, b, 0, chunksize); - return c.compare(a, b); + return compare(a, j); } - + public static void main(String[] args) { String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; - kelondroCollection c = new kelondroCollection(10); + kelondroCollection c = new kelondroCollection(10, 0, kelondroNaturalOrder.naturalOrder, new byte[0]); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); + c.sort(); c.remove("fuenf".getBytes()); Iterator i = c.elements(); String s; + System.out.print("INPUT-ITERATOR: "); while (i.hasNext()) { s = new String((byte[]) i.next()).trim(); System.out.print(s + ", "); if (s.equals("drei")) i.remove(); } System.out.println(""); - System.out.println(c.toString()); - c.sort(kelondroNaturalOrder.naturalOrder); - System.out.println(c.toString()); - c.uniq(kelondroNaturalOrder.naturalOrder); - System.out.println(c.toString()); + System.out.println("INPUT-TOSTRING: " + c.toString()); + c.sort(); + System.out.println("SORTED : " + c.toString()); + c.uniq(); + System.out.println("UNIQ : " + c.toString()); c.trim(); - System.out.println(c.toString()); + System.out.println("TRIM : " + c.toString()); + c = new kelondroCollection(10, 0, kelondroNaturalOrder.naturalOrder, new byte[0]); + long start = System.currentTimeMillis(); + long t, d = 0; + byte[] w; + for (long k = 0; k < 100000; k++) { + t = System.currentTimeMillis(); + w = ("a" + Long.toString((t % 13775) + k)).getBytes(); + if (c.get(w) == null) c.add(w); else d++; + if (k % 1000 == 0) + System.out.println("added " + k + " entries in " + + ((t - start) / 1000) + " seconds, " + + (((t - start) > 1000) ? (k / ((t - start) / 1000)) : 0) + + " entries/second, " + d + " double, size = " + c.size() + + ", sum = " + (c.size() + d)); + } } } diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 0a3119b58..7b1c767f2 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -124,7 +124,7 @@ public class kelondroCollectionIndex { // define the new storage array byte[][] newarrayrow = new byte[][]{key, kelondroNaturalOrder.encodeLong((long) collection.size(), 4), - collection.getOrderingSignature().getBytes(), + null /*collection.getOrderingSignature().getBytes()*/, collection.toByteArray()}; if (oldindexrow == null) { // the collection is new @@ -194,7 +194,7 @@ public class kelondroCollectionIndex { // read the row and define a collection int chunkcountInArray = (int) arrayrow.getColLongB256(1); if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray); - return new kelondroCollection(chunksize, chunkcount, arrayrow.getColString(2, null), arrayrow.getColBytes(3)); + return new kelondroCollection(chunksize, chunkcount, null /*, arrayrow.getColString(2, null)*/, arrayrow.getColBytes(3)); } public void remove(byte[] key) throws IOException { diff --git a/source/de/anomic/kelondro/kelondroOrder.java b/source/de/anomic/kelondro/kelondroOrder.java index 71715eb0b..27deaff6b 100644 --- a/source/de/anomic/kelondro/kelondroOrder.java +++ b/source/de/anomic/kelondro/kelondroOrder.java @@ -61,6 +61,10 @@ public interface kelondroOrder extends Comparator { public int compare(byte[] a, byte[] b); + public int compare(byte[] a, byte[] b, int boffset, int blength); + + public int compare(byte[] a, int aoffset, int alength, byte[] b, int boffset, int blength); + public byte[] zero(); // returns the zero point of the Ordering; null if not defined public void rotate(byte[] zero); // defines that the ordering rotates, and sets the zero point for the rotation