From c51603a4057ee82a6dc2b5c8d30af90273479b51 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 1 Mar 2006 00:25:02 +0000 Subject: [PATCH] added two new kelondro classes that will handle the new index data structures (not used yet) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1789 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../anomic/kelondro/kelondroCollection.java | 355 ++++++++++++++++++ .../kelondro/kelondroCollectionIndex.java | 222 +++++++++++ .../kelondroOutOfLimitsException.java | 56 +++ 3 files changed, 633 insertions(+) create mode 100644 source/de/anomic/kelondro/kelondroCollection.java create mode 100644 source/de/anomic/kelondro/kelondroCollectionIndex.java create mode 100644 source/de/anomic/kelondro/kelondroOutOfLimitsException.java diff --git a/source/de/anomic/kelondro/kelondroCollection.java b/source/de/anomic/kelondro/kelondroCollection.java new file mode 100644 index 000000000..551518225 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroCollection.java @@ -0,0 +1,355 @@ +// kelondroCollection.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2006 +// created: 12.01.2006 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.util.Comparator; +import java.util.Iterator; + +public class kelondroCollection { + + private byte[] chunkcache; + private int chunkcount; + private int chunksize; + private long lastTimeRead, lastTimeWrote; + private String orderkey; + + public kelondroCollection(int objectSize) { + this(objectSize, 0, null, new byte[0]); + } + + public kelondroCollection(int objectSize, int objectCount, String signature, byte[] collectioncache) { + assert (collectioncache.length % objectSize == 0); + assert (objectCount <= collectioncache.length / objectSize); + this.chunksize = objectSize; + this.chunkcache = collectioncache; + this.chunkcount = objectCount; + this.orderkey = signature; // no current ordering + } + + private void ensureSize(int elements) { + int needed = elements * chunksize; + if (chunkcache.length >= needed) return; + byte[] newChunkcache = new byte[needed]; + System.arraycopy(chunkcache, 0, newChunkcache, 0, chunkcache.length); + chunkcache = newChunkcache; + newChunkcache = null; + } + + public void trim() { + synchronized (chunkcache) { + int needed = chunkcount * chunksize; + if (chunkcache.length == needed) return; + byte[] newChunkcache = new byte[needed]; + System.arraycopy(chunkcache, 0, newChunkcache, 0, newChunkcache.length); + chunkcache = newChunkcache; + newChunkcache = null; + } + } + + public void add(byte[] a) { + assert (a.length <= chunksize); + synchronized (chunkcache) { + ensureSize(chunkcount + 1); + System.arraycopy(a, 0, chunkcache, chunksize * chunkcount, a.length); + chunkcount++; + this.orderkey = null; + } + } + + public void addAll(kelondroCollection c) { + assert(this.chunksize >= c.chunksize); + synchronized(chunkcache) { + ensureSize(chunkcount + c.size()); + } + Iterator i = c.elements(); + while (i.hasNext()) { + add((byte[]) i.next()); + } + } + + public void remove(byte[] a) { + // the byte[] a may be shorter than the chunksize + if (chunkcount == 0) return; + synchronized(chunkcache) { + int p = find(a); + remove(p); + } + } + + public void remove(byte[] a, Comparator c) { + // the byte[] a may be shorter than the chunksize + if (chunkcount == 0) return; + synchronized(chunkcache) { + int p = find(a, c); + remove(p); + } + } + + private void remove(int p) { + if (chunkcount == 0) return; + if ((p < 0) || (p >= chunkcount)) return; // out of bounds, nothing to delete + System.arraycopy(chunkcache, (p + 1) * chunksize, chunkcache, p * chunksize, (chunkcount - p - 1) * chunksize); + chunkcount--; + } + + private int find(byte[] a) { + // returns the chunknumber + for (int i = 0; i < chunkcount; i++) { + if (match(a, i)) return i; + } + return -1; + } + + private int find(byte[] a, Comparator c) { + // returns the chunknumber + for (int i = 0; i < chunkcount; i++) { + if (compare(a, i, c) == 0) return i; + } + return -1; + } + + public void removeAll(kelondroCollection c) { + Iterator i = c.elements(); + while (i.hasNext()) remove((byte[]) i.next()); + } + + public void clear() { + this.chunkcount = 0; + this.chunkcache = new byte[0]; + this.orderkey = null; + } + + public int size() { + return chunkcount; + } + + + public Iterator elements() { // iterates byte[] - objects + return new chunkIterator(); + } + + public class chunkIterator implements Iterator { + + int c = 0; + + public chunkIterator() { + c = 0; + } + + public boolean hasNext() { + return c < chunkcount; + } + + public Object next() { + byte[] chunk = new byte[chunksize]; + System.arraycopy(chunkcache, c * chunksize, chunk, 0, chunksize); + c++; + return chunk; + } + + public void remove() { + c--; + System.arraycopy(chunkcache, (c + 1) * chunksize, chunkcache, c * chunksize, (chunkcount - c - 1) * chunksize); + chunkcount--; + } + + } + + public String getOrderingSignature() { + return this.orderkey; + } + + public int binarySearch(byte[] key, Comparator c) { + assert (this.orderkey != null); + int l = 0; + int r = chunkcount - 1; + int p = 0; + int d; + while (l <= r) { + p = (l + r) >> 1; + d = compare(key, p, c); + if (d == 0) return p; + else if (d < 0) r = p - 1; + else l = ++p; + } + return -p - 1; + } + + public void sort(kelondroOrder ko) { + if (this.orderkey == ko.signature()) return; // this is already sorted + qsort(0, chunkcount - 1, (Comparator) ko); + this.orderkey = ko.signature(); + } + + public void sort(int fromIndex, int toIndex, Comparator c) { + assert (fromIndex <= toIndex); + assert (fromIndex >= 0); + synchronized(chunkcache) { + qsort(fromIndex, toIndex, c); + } + } + + private void swap(int i, int j) { + byte[] a = new byte[chunksize]; + System.arraycopy(chunkcache, chunksize * i, a, 0, chunksize); + System.arraycopy(chunkcache, chunksize * j , chunkcache, chunksize * i, chunksize); + System.arraycopy(a, 0, chunkcache, chunksize * j, chunksize); + } + + private void isort(int l, int r, Comparator c) { + for (int i = l + 1; i <= r; i++) + for (int j = i; j > l && compare(j - 1, j, c) > 0; j--) + swap(j, j - 1); + } + + private void qsort(int l, int r, Comparator c) { + if (l >= r) return; + + if (r - l < 10) { + isort(l, r, c); + return; + } + + int i = l; + int j = r; + byte[] a = new byte[chunksize]; + int pivot = (i + j) / 2; + System.arraycopy(chunkcache, pivot * chunksize, a, 0, chunksize); + while (i <= j) { + while (compare(a, i, c) == 1) i++; // chunkAt[i] < keybuffer + while (compare(a, j, c) == -1) j--; // chunkAt[j] > keybuffer + if (i <= j) { + swap(i, j); + i++; + j--; + } + } + qsort(l, j, c); + qsort(i, r, c); + } + + public void uniq(Comparator c) { + assert (this.orderkey != null); + // removes double-occurrences of chunks + // this works only if the collection was ordered with sort before + synchronized (chunkcache) { + if (chunkcount <= 1) return; + int i = 0; + while (i < chunkcount - 1) { + if (compare(i, i + 1, c) == 0) { + remove(i); + } else { + i++; + } + } + } + } + + public String toString() { + StringBuffer s = new StringBuffer(); + Iterator i = elements(); + if (i.hasNext()) s.append(new String((byte[]) i.next()).trim()); + while (i.hasNext()) s.append(", " + new String((byte[]) i.next()).trim()); + return new String(s); + } + + public byte[] toByteArray() { + return this.chunkcache; + } + + public boolean match(byte[] a, int chunknumber) { + if (chunknumber >= chunkcount) + return false; + int i = 0; + int p = chunknumber * chunksize; + final int len = a.length; + if (len > chunksize) + return false; + while (i < len) + if (a[i++] != chunkcache[p++]) + return false; + return true; + } + + public int compare(byte[] a, int chunknumber, Comparator c) { + // this can be enhanced + assert (chunknumber < chunkcount); + byte[] b = new byte[chunksize]; + System.arraycopy(chunkcache, chunknumber * chunksize, b, 0, chunksize); + return c.compare(a, b); + } + + public int compare(int i, int j, Comparator c) { + // this can be enhanced + assert (i < chunkcount); + assert (j < chunkcount); + byte[] a = new byte[chunksize]; + byte[] b = new byte[chunksize]; + System.arraycopy(chunkcache, i * chunksize, a, 0, chunksize); + System.arraycopy(chunkcache, j * chunksize, b, 0, chunksize); + return c.compare(a, b); + } + + public static void main(String[] args) { + String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; + kelondroCollection c = new kelondroCollection(10); + for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); + for (int i = 0; i < test.length; i++) c.add(test[i].getBytes()); + c.remove("fuenf".getBytes()); + Iterator i = c.elements(); + String s; + while (i.hasNext()) { + s = new String((byte[]) i.next()).trim(); + System.out.print(s + ", "); + if (s.equals("drei")) i.remove(); + } + System.out.println(""); + System.out.println(c.toString()); + c.sort(kelondroNaturalOrder.naturalOrder); + System.out.println(c.toString()); + c.uniq(kelondroNaturalOrder.naturalOrder); + System.out.println(c.toString()); + c.trim(); + System.out.println(c.toString()); + } + +} diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java new file mode 100644 index 000000000..7084616a9 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -0,0 +1,222 @@ +package de.anomic.kelondro; + +// a collectionIndex is an index to collection (kelondroCollection) objects +// such a collection ist defined by the following parameters +// - chunksize +// - chunkcount +// each of such a collection is stored in a byte[] which may or may not have space for more chunks +// than already exists in such an array. To store these arrays, we reserve entries in kelondroArray +// database files. There will be a set of array files for different sizes of the collection arrays. +// the 1st file has space for chunks, the 2nd file for * chunks, +// the 3rd file for ^^3 chunks, and the n-th file for ^^n chunks. +// if the loadfactor is 4, then we have the following capacities: +// file 0: 4 +// file 1: 16 +// file 2: 64 +// file 3: 256 +// file 4: 1024 +// file 5: 4096 +// file 6:16384 +// file 7:65536 +// the maximum number of such files is called the partitions number. +// we don't want that these files grow too big, an kelondroOutOfLimitsException is throws if they +// are oversized. +// the collection arrays may be migration to another size during run-time, which means that not only the +// partitions as mentioned above are maintained, but also a set of "shadow-partitions", that represent old +// partitions and where data is read only and slowly migrated to the default partitions. + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +public class kelondroCollectionIndex { + + private kelondroIndex index; + private File path; + private String filenameStub; + private int loadfactor; + private int chunksize; + private int partitions; + private int maxChunks; + private kelondroArray[] array; + private int[] arrayCapacity; + + private static File arrayFile(File path, String filenameStub, int loadfactor, int chunksize, int partitionNumber) { + String lf = Integer.toHexString(loadfactor).toUpperCase(); + while (lf.length() < 2) lf = "0" + lf; + String cs = Integer.toHexString(chunksize).toUpperCase(); + while (cs.length() < 4) cs = "0" + cs; + String pn = Integer.toHexString(partitionNumber).toUpperCase(); + while (pn.length() < 2) pn = "0" + pn; + return new File(path, filenameStub + "." + lf + "." + cs + "." + pn + ".kca"); // kelondro collection array + } + + private static final long day = 1000 * 60 * 60 * 24; + + private static int daysSince2000(long time) { + return (int) (time / day) - 10957; + } + + public kelondroCollectionIndex(File path, String filenameStub, int keyLength, kelondroOrder indexOrder, long buffersize, + int loadfactor, int chunksize, int partitions) throws IOException { + this.path = path; + this.filenameStub = filenameStub; + this.chunksize = chunksize; + this.partitions = partitions; + this.loadfactor = loadfactor; + + // create index file(s) + int[] columns; + columns = new int[3]; + columns[0] = keyLength; + columns[1] = 4; // chunksize (number of bytes in a single chunk, needed for migration option) + columns[2] = 4; // chunkcount (number of chunks in this collection) + columns[3] = 4; // index (position in index file) + columns[4] = 2; // update time in days since 1.1.2000 + index = new kelondroSplittedTree(path, filenameStub, indexOrder, buffersize, 8, columns, 1, 80, true); + + // create array files + this.array = new kelondroArray[partitions]; + this.arrayCapacity = new int[partitions]; + + // open array files + int load = 1; + + for (int i = 0; i < partitions; i++) { + load = load * loadfactor; + array[i] = openArrayFile(chunksize, i); + arrayCapacity[i] = load; + } + this.maxChunks = load; + } + + private kelondroArray openArrayFile(int genericChunkSize, int partitionNumber) throws IOException { + File f = arrayFile(path, filenameStub, loadfactor, genericChunkSize, partitionNumber); + + if (f.exists()) { + return new kelondroArray(f); + } else { + int load = 1; for (int i = 0; i < partitionNumber; i++) load = load * loadfactor; + int[] columns = new int[4]; + columns[0] = index.columnSize(0); // add always the key + columns[1] = 4; // chunkcount (raw format) + columns[2] = 2; // last time read + columns[3] = 2; // last time wrote + columns[4] = 2; // flag string, assigns collection order as currently stored in table + columns[5] = load * genericChunkSize; + return new kelondroArray(f, columns, 0, true); + } + } + + private int arrayIndex(int requestedCapacity) throws kelondroOutOfLimitsException{ + // the requestedCapacity is the number of wanted chunks + for (int i = 0; i < arrayCapacity.length; i++) { + if (arrayCapacity[i] >= requestedCapacity) return i; + } + throw new kelondroOutOfLimitsException(maxChunks, requestedCapacity); + } + + public void put(byte[] key, kelondroCollection collection) throws IOException, kelondroOutOfLimitsException { + if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size()); + + // first find an old entry, if one exists + byte[][] oldindexrow = index.get(key); + + // define the new storage array + byte[][] newarrayrow = new byte[][]{key, + kelondroNaturalOrder.encodeLong((long) collection.size(), 4), + collection.getOrderingSignature().getBytes(), + collection.toByteArray()}; + if (oldindexrow == null) { + // the collection is new + // find appropriate partition for the collection: + int part = arrayIndex(collection.size()); + + // write a new entry in this array + int newRowNumber = array[part].add(newarrayrow); + // store the new row number in the index + index.put(new byte[][]{key, + kelondroNaturalOrder.encodeLong(this.chunksize, 4), + kelondroNaturalOrder.encodeLong(collection.size(), 4), + kelondroNaturalOrder.encodeLong((long) newRowNumber, 4), + kelondroNaturalOrder.encodeLong(daysSince2000(System.currentTimeMillis()), 2) + }); + } else { + // overwrite the old collection + // read old information + //int chunksize = (int) kelondroNaturalOrder.decodeLong(oldindexrow[1]); // needed only for migration + int chunkcount = (int) kelondroNaturalOrder.decodeLong(oldindexrow[2]); + int rownumber = (int) kelondroNaturalOrder.decodeLong(oldindexrow[3]); + int oldPartitionNumber = arrayIndex(chunkcount); + int newPartitionNumber = arrayIndex(collection.size()); + + // see if we need new space or if we can overwrite the old space + if (oldPartitionNumber == newPartitionNumber) { + // we don't need a new slot, just write in the old one + array[oldPartitionNumber].set(rownumber, newarrayrow); + // update the index entry + index.put(new byte[][]{key, + kelondroNaturalOrder.encodeLong(this.chunksize, 4), + kelondroNaturalOrder.encodeLong(collection.size(), 4), + kelondroNaturalOrder.encodeLong((long) rownumber, 4), + kelondroNaturalOrder.encodeLong(daysSince2000(System.currentTimeMillis()), 2) + }); + } else { + // we need a new slot, that means we must first delete the old entry + array[oldPartitionNumber].remove(rownumber); + // write a new entry in the other array + int newRowNumber = array[newPartitionNumber].add(newarrayrow); + // store the new row number in the index + index.put(new byte[][]{key, + kelondroNaturalOrder.encodeLong(this.chunksize, 4), + kelondroNaturalOrder.encodeLong(collection.size(), 4), + kelondroNaturalOrder.encodeLong((long) newRowNumber, 4), + kelondroNaturalOrder.encodeLong(daysSince2000(System.currentTimeMillis()), 2) + }); + } + } + } + + public kelondroCollection get(byte[] key) throws IOException { + // find an entry, if one exists + byte[][] indexrow = index.get(key); + if (indexrow == null) return null; + // read values + int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]); + int chunkcount = (int) kelondroNaturalOrder.decodeLong(indexrow[2]); + int rownumber = (int) kelondroNaturalOrder.decodeLong(indexrow[3]); + int partitionnumber = arrayIndex(chunkcount); + // open array entry + byte[][] arrayrow = array[partitionnumber].get(rownumber); + if (arrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array does not contain expected row"); + // read the row and define a collection + int chunkcountInArray = (int) kelondroNaturalOrder.decodeLong(arrayrow[1]); + if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray); + return new kelondroCollection(chunksize, chunkcount, new String(arrayrow[2]), arrayrow[3]); + } + + public void remove(byte[] key) throws IOException { + // find an entry, if one exists + byte[][] indexrow = index.get(key); + if (indexrow == null) return; + // read values + //int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]); + int chunkcount = (int) kelondroNaturalOrder.decodeLong(indexrow[2]); + int rownumber = (int) kelondroNaturalOrder.decodeLong(indexrow[3]); + int partitionnumber = arrayIndex(chunkcount); + // remove array entry + array[partitionnumber].remove(rownumber); + } + + /* + public Iterator collections(boolean up, boolean rotating) throws IOException { + // Objects are of type kelondroCollection + } + */ + + public static void main(String[] args) { + System.out.println(new java.util.Date(10957 * day)); + System.out.println(new java.util.Date(0)); + System.out.println(daysSince2000(System.currentTimeMillis())); + } +} diff --git a/source/de/anomic/kelondro/kelondroOutOfLimitsException.java b/source/de/anomic/kelondro/kelondroOutOfLimitsException.java new file mode 100644 index 000000000..69918d78f --- /dev/null +++ b/source/de/anomic/kelondro/kelondroOutOfLimitsException.java @@ -0,0 +1,56 @@ +// kelondroOutOfLimitsException.java +// --------------------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2006 +// created: 17.01.2006 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +public class kelondroOutOfLimitsException extends java.lang.RuntimeException { + + private static final long serialVersionUID = 1L; + + public kelondroOutOfLimitsException() { + super("unspecific-error"); + } + + public kelondroOutOfLimitsException(int expectedLimit, int actualSize) { + super("Object size is " + actualSize + "; it exceeds the size limit " + expectedLimit); + } + +}