some work on kelondroCollection, no effect on current YaCy functions

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2180 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 2185a5cfb7
commit 577a6e6487

@ -61,7 +61,6 @@ import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node; import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;

@ -74,6 +74,20 @@ public abstract class kelondroAbstractOrder implements kelondroOrder {
throw new IllegalArgumentException("Object type or Object type combination not supported: a=" + a + ", b=" + b); throw new IllegalArgumentException("Object type or Object type combination not supported: a=" + a + ", b=" + b);
} }
public int compare(byte[] a, byte[] b, int boffset, int blength) {
int l = Math.min(a.length, blength);
byte[] bb = new byte[l];
System.arraycopy(b, boffset, bb, 0, l);
return compare(a, bb);
}
public int compare(byte[] a, int aoffset, int alength, byte[] b, int boffset, int blength) {
int l = Math.min(alength, blength);
byte[] aa = new byte[l];
System.arraycopy(a, aoffset, aa, 0, l);
return compare(aa, b, boffset, blength);
}
public byte[] zero() { public byte[] zero() {
return zero; return zero;
} }

@ -49,26 +49,26 @@ public class kelondroCollection {
private byte[] chunkcache; private byte[] chunkcache;
private int chunkcount; private int chunkcount;
private int chunksize; private int chunksize;
private int sortbound;
private long lastTimeRead, lastTimeWrote; private long lastTimeRead, lastTimeWrote;
private String orderkey; private kelondroOrder order;
public kelondroCollection(int objectSize) { public kelondroCollection(int objectSize) {
this(objectSize, 0, null, new byte[0]); this(objectSize, 0, null, new byte[0]);
} }
public kelondroCollection(int objectSize, int objectCount, String signature, byte[] collectioncache) { public kelondroCollection(int objectSize, int objectCount, kelondroOrder ordering, byte[] cache) {
assert (collectioncache.length % objectSize == 0);
assert (objectCount <= collectioncache.length / objectSize);
this.chunksize = objectSize; this.chunksize = objectSize;
this.chunkcache = collectioncache; this.chunkcache = cache;
this.chunkcount = objectCount; this.chunkcount = objectCount;
this.orderkey = signature; // no current ordering this.order = ordering;
this.sortbound = 0;
} }
private void ensureSize(int elements) { private void ensureSize(int elements) {
int needed = elements * chunksize; int needed = elements * chunksize;
if (chunkcache.length >= needed) return; if (chunkcache.length >= needed) return;
byte[] newChunkcache = new byte[needed]; byte[] newChunkcache = new byte[needed * 2];
System.arraycopy(chunkcache, 0, newChunkcache, 0, chunkcache.length); System.arraycopy(chunkcache, 0, newChunkcache, 0, chunkcache.length);
chunkcache = newChunkcache; chunkcache = newChunkcache;
newChunkcache = null; newChunkcache = null;
@ -117,7 +117,6 @@ public class kelondroCollection {
ensureSize(chunkcount + 1); ensureSize(chunkcount + 1);
System.arraycopy(a, 0, chunkcache, chunksize * chunkcount, a.length); System.arraycopy(a, 0, chunkcache, chunksize * chunkcount, a.length);
chunkcount++; chunkcount++;
this.orderkey = null;
} }
this.lastTimeWrote = System.currentTimeMillis(); this.lastTimeWrote = System.currentTimeMillis();
} }
@ -142,11 +141,11 @@ public class kelondroCollection {
} }
} }
public void remove(byte[] a, Comparator c) { public void remove(byte[] a, kelondroOrder ko) {
// the byte[] a may be shorter than the chunksize // the byte[] a may be shorter than the chunksize
if (chunkcount == 0) return; if (chunkcount == 0) return;
synchronized(chunkcache) { synchronized(chunkcache) {
int p = find(a, c); int p = find(a);
remove(p); remove(p);
} }
} }
@ -156,25 +155,10 @@ public class kelondroCollection {
if ((p < 0) || (p >= chunkcount)) return; // out of bounds, nothing to delete if ((p < 0) || (p >= chunkcount)) return; // out of bounds, nothing to delete
System.arraycopy(chunkcache, (p + 1) * chunksize, chunkcache, p * chunksize, (chunkcount - p - 1) * chunksize); System.arraycopy(chunkcache, (p + 1) * chunksize, chunkcache, p * chunksize, (chunkcount - p - 1) * chunksize);
chunkcount--; chunkcount--;
if (p < sortbound) sortbound--;
this.lastTimeWrote = System.currentTimeMillis(); this.lastTimeWrote = System.currentTimeMillis();
} }
private int find(byte[] a) {
// returns the chunknumber
for (int i = 0; i < chunkcount; i++) {
if (match(a, i)) return i;
}
return -1;
}
private int find(byte[] a, Comparator c) {
// returns the chunknumber
for (int i = 0; i < chunkcount; i++) {
if (compare(a, i, c) == 0) return i;
}
return -1;
}
public void removeAll(kelondroCollection c) { public void removeAll(kelondroCollection c) {
Iterator i = c.elements(); Iterator i = c.elements();
while (i.hasNext()) remove((byte[]) i.next()); while (i.hasNext()) remove((byte[]) i.next());
@ -183,14 +167,13 @@ public class kelondroCollection {
public void clear() { public void clear() {
this.chunkcount = 0; this.chunkcount = 0;
this.chunkcache = new byte[0]; this.chunkcache = new byte[0];
this.orderkey = null; this.order = null;
} }
public int size() { public int size() {
return chunkcount; return chunkcount;
} }
public Iterator elements() { // iterates byte[] - objects public Iterator elements() { // iterates byte[] - objects
return new chunkIterator(); return new chunkIterator();
} }
@ -222,88 +205,151 @@ public class kelondroCollection {
} }
public String getOrderingSignature() { public kelondroOrder getOrdering() {
return this.orderkey; return this.order;
} }
public int binarySearch(byte[] key, Comparator c) { private int find(byte[] a) {
assert (this.orderkey != null); // returns the chunknumber; -1 if not found
if (this.order == null) return iterativeSearch(a);
// check if a re-sorting make sense
if (this.chunkcount - this.sortbound > 3000) sort();
// first try to find in sorted area
int p = iterativeSearch(a);
if (p >= 0) return p;
// then find in unsorted area
return binarySearch(a);
}
private int iterativeSearch(byte[] key) {
// returns the chunknumber
if (this.order == null) {
for (int i = this.sortbound; i < this.chunkcount; i++) {
if (match(key, i)) return i;
}
return -1;
} else {
for (int i = this.sortbound; i < this.chunkcount; i++) {
if (compare(key, i) == 0) return i;
}
return -1;
}
}
private int binarySearch(byte[] key) {
assert (this.order != null);
int l = 0; int l = 0;
int r = chunkcount - 1; int rbound = this.sortbound;
int p = 0; int p = 0;
int d; int d;
while (l <= r) { while (l < rbound) {
p = (l + r) >> 1; p = l + ((rbound - l) >> 1);
d = compare(key, p, c); d = compare(key, p);
if (d == 0) return p; if (d == 0) return p;
else if (d < 0) r = p - 1; else if (d < 0) rbound = p;
else l = ++p; else l = p + 1;
} }
return -p - 1; return -1;
} }
public void sort(kelondroOrder ko) { public void sort() {
if (this.orderkey == ko.signature()) return; // this is already sorted if (this.sortbound == this.chunkcount) return; // this is already sorted
qsort(0, chunkcount - 1, (Comparator) ko); System.out.println("SORT");
this.orderkey = ko.signature(); if (this.sortbound > 1) qsort(0, this.sortbound, this.chunkcount);
else qsort(0, this.chunkcount);
this.sortbound = this.chunkcount;
} }
public void sort(int fromIndex, int toIndex, Comparator c) { private void qsort(int l, int sbound, int rbound) {
assert (fromIndex <= toIndex); //System.out.println("QSORT: chunkcache.length=" + chunkcache.length + ", chunksize=" + chunksize + ", l=" + l + ", sbound=" + sbound + ", rbound=" + rbound);
assert (fromIndex >= 0); assert (sbound <= rbound);
synchronized(chunkcache) { if (l >= rbound - 1) return;
qsort(fromIndex, toIndex, c);
} if (rbound - l < 1000) {
} isort(l, rbound);
return;
private void swap(int i, int j) { }
int p = l + ((sbound - l) / 2);
int q = sbound;
int qs = q;
byte[] a = new byte[chunksize]; byte[] a = new byte[chunksize];
System.arraycopy(chunkcache, chunksize * i, a, 0, chunksize); try {
System.arraycopy(chunkcache, chunksize * j , chunkcache, chunksize * i, chunksize); System.arraycopy(chunkcache, p * chunksize, a, 0, chunksize);
System.arraycopy(a, 0, chunkcache, chunksize * j, chunksize); } catch (ArrayIndexOutOfBoundsException e) {
} System.out.println("EXCEPTION: chunkcache.length=" + chunkcache.length + ", p=" + p + ", chunksize=" + chunksize + ", l=" + l + ", sbound=" + sbound + ", rbound=" + rbound);
System.exit(-1);
private void isort(int l, int r, Comparator c) { }
for (int i = l + 1; i <= r; i++) p++;
for (int j = i; j > l && compare(j - 1, j, c) > 0; j--) int ps = p;
swap(j, j - 1); while (q < rbound) {
if (compare(a, q) < 1) {
q++;
} else {
swap(p, q);
p++;
q++;
}
}
if (qs < p) qs = p;
if ((ps - l) <= ((p - l) / 2)) qsort(l, p); else qsort(l, ps, p);
if ((qs - p) <= ((q - p) / 2)) qsort(p, q); else qsort(p, qs, q);
} }
private void qsort(int l, int r, Comparator c) { private void qsort(int l, int rbound) {
if (l >= r) return; if (l >= rbound - 1) return;
if (r - l < 10) { if (rbound - l < 10) {
isort(l, r, c); isort(l, rbound);
return; return;
} }
int i = l; int i = l;
int j = r; int j = rbound - 1;
byte[] a = new byte[chunksize]; byte[] a = new byte[chunksize];
int pivot = (i + j) / 2; int pivot = (i + j) / 2;
System.arraycopy(chunkcache, pivot * chunksize, a, 0, chunksize); System.arraycopy(chunkcache, pivot * chunksize, a, 0, chunksize);
while (i <= j) { while (i <= j) {
while (compare(a, i, c) == 1) i++; // chunkAt[i] < keybuffer while (compare(a, i) == 1) i++; // chunkAt[i] < keybuffer
while (compare(a, j, c) == -1) j--; // chunkAt[j] > keybuffer while (compare(a, j) == -1) j--; // chunkAt[j] > keybuffer
if (i <= j) { if (i <= j) {
swap(i, j); swap(i, j);
i++; i++;
j--; j--;
} }
} }
qsort(l, j, c); qsort(l, i);
qsort(i, r, c); qsort(i, rbound);
}
private void isort(int l, int rbound) {
for (int i = l + 1; i < rbound; i++)
for (int j = i; j > l && compare(j - 1, j) > 0; j--)
swap(j, j - 1);
} }
public void uniq(Comparator c) { private void swap(int i, int j) {
assert (this.orderkey != null); byte[] a = new byte[chunksize];
System.arraycopy(chunkcache, chunksize * i, a, 0, chunksize);
System.arraycopy(chunkcache, chunksize * j , chunkcache, chunksize * i, chunksize);
System.arraycopy(a, 0, chunkcache, chunksize * j, chunksize);
}
public void uniq() {
assert (this.order != null);
// removes double-occurrences of chunks // removes double-occurrences of chunks
// this works only if the collection was ordered with sort before // this works only if the collection was ordered with sort before
synchronized (chunkcache) { synchronized (chunkcache) {
if (chunkcount <= 1) return; if (chunkcount <= 1) return;
int i = 0; int i = 0;
while (i < chunkcount - 1) { while (i < chunkcount - 1) {
if (compare(i, i + 1, c) == 0) { if (compare(i, i + 1) == 0) {
remove(i); remove(i);
} else { } else {
i++; i++;
@ -325,59 +371,69 @@ public class kelondroCollection {
} }
public boolean match(byte[] a, int chunknumber) { public boolean match(byte[] a, int chunknumber) {
if (chunknumber >= chunkcount) if (chunknumber >= chunkcount) return false;
return false;
int i = 0; int i = 0;
int p = chunknumber * chunksize; int p = chunknumber * chunksize;
final int len = a.length; final int len = a.length;
if (len > chunksize) if (len > chunksize) return false;
return false;
while (i < len) while (i < len)
if (a[i++] != chunkcache[p++]) if (a[i++] != chunkcache[p++]) return false;
return false;
return true; return true;
} }
public int compare(byte[] a, int chunknumber, Comparator c) { public int compare(byte[] a, int chunknumber) {
// this can be enhanced
assert (chunknumber < chunkcount); assert (chunknumber < chunkcount);
byte[] b = new byte[chunksize]; int l = Math.min(a.length, chunksize);
System.arraycopy(chunkcache, chunknumber * chunksize, b, 0, chunksize); return this.order.compare(a, chunkcache, chunknumber * chunksize, l);
return c.compare(a, b);
} }
public int compare(int i, int j, Comparator c) { public int compare(int i, int j) {
// this can be enhanced // this can be enhanced
assert (i < chunkcount); assert (i < chunkcount);
assert (j < chunkcount); assert (j < chunkcount);
byte[] a = new byte[chunksize]; byte[] a = new byte[chunksize];
byte[] b = new byte[chunksize];
System.arraycopy(chunkcache, i * chunksize, a, 0, chunksize); System.arraycopy(chunkcache, i * chunksize, a, 0, chunksize);
System.arraycopy(chunkcache, j * chunksize, b, 0, chunksize); return compare(a, j);
return c.compare(a, b);
} }
public static void main(String[] args) { public static void main(String[] args) {
String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" };
kelondroCollection c = new kelondroCollection(10); kelondroCollection c = new kelondroCollection(10, 0, kelondroNaturalOrder.naturalOrder, new byte[0]);
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
c.sort();
c.remove("fuenf".getBytes()); c.remove("fuenf".getBytes());
Iterator i = c.elements(); Iterator i = c.elements();
String s; String s;
System.out.print("INPUT-ITERATOR: ");
while (i.hasNext()) { while (i.hasNext()) {
s = new String((byte[]) i.next()).trim(); s = new String((byte[]) i.next()).trim();
System.out.print(s + ", "); System.out.print(s + ", ");
if (s.equals("drei")) i.remove(); if (s.equals("drei")) i.remove();
} }
System.out.println(""); System.out.println("");
System.out.println(c.toString()); System.out.println("INPUT-TOSTRING: " + c.toString());
c.sort(kelondroNaturalOrder.naturalOrder); c.sort();
System.out.println(c.toString()); System.out.println("SORTED : " + c.toString());
c.uniq(kelondroNaturalOrder.naturalOrder); c.uniq();
System.out.println(c.toString()); System.out.println("UNIQ : " + c.toString());
c.trim(); c.trim();
System.out.println(c.toString()); System.out.println("TRIM : " + c.toString());
c = new kelondroCollection(10, 0, kelondroNaturalOrder.naturalOrder, new byte[0]);
long start = System.currentTimeMillis();
long t, d = 0;
byte[] w;
for (long k = 0; k < 100000; k++) {
t = System.currentTimeMillis();
w = ("a" + Long.toString((t % 13775) + k)).getBytes();
if (c.get(w) == null) c.add(w); else d++;
if (k % 1000 == 0)
System.out.println("added " + k + " entries in " +
((t - start) / 1000) + " seconds, " +
(((t - start) > 1000) ? (k / ((t - start) / 1000)) : 0) +
" entries/second, " + d + " double, size = " + c.size() +
", sum = " + (c.size() + d));
}
} }
} }

@ -124,7 +124,7 @@ public class kelondroCollectionIndex {
// define the new storage array // define the new storage array
byte[][] newarrayrow = new byte[][]{key, byte[][] newarrayrow = new byte[][]{key,
kelondroNaturalOrder.encodeLong((long) collection.size(), 4), kelondroNaturalOrder.encodeLong((long) collection.size(), 4),
collection.getOrderingSignature().getBytes(), null /*collection.getOrderingSignature().getBytes()*/,
collection.toByteArray()}; collection.toByteArray()};
if (oldindexrow == null) { if (oldindexrow == null) {
// the collection is new // the collection is new
@ -194,7 +194,7 @@ public class kelondroCollectionIndex {
// read the row and define a collection // read the row and define a collection
int chunkcountInArray = (int) arrayrow.getColLongB256(1); int chunkcountInArray = (int) arrayrow.getColLongB256(1);
if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray); if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray);
return new kelondroCollection(chunksize, chunkcount, arrayrow.getColString(2, null), arrayrow.getColBytes(3)); return new kelondroCollection(chunksize, chunkcount, null /*, arrayrow.getColString(2, null)*/, arrayrow.getColBytes(3));
} }
public void remove(byte[] key) throws IOException { public void remove(byte[] key) throws IOException {

@ -61,6 +61,10 @@ public interface kelondroOrder extends Comparator {
public int compare(byte[] a, byte[] b); public int compare(byte[] a, byte[] b);
public int compare(byte[] a, byte[] b, int boffset, int blength);
public int compare(byte[] a, int aoffset, int alength, byte[] b, int boffset, int blength);
public byte[] zero(); // returns the zero point of the Ordering; null if not defined public byte[] zero(); // returns the zero point of the Ordering; null if not defined
public void rotate(byte[] zero); // defines that the ordering rotates, and sets the zero point for the rotation public void rotate(byte[] zero); // defines that the ordering rotates, and sets the zero point for the rotation

Loading…
Cancel
Save