added two new kelondro classes that will handle the new index data structures (not used yet)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1789 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent bec1a8e922
commit c51603a405

@ -0,0 +1,355 @@
// kelondroCollection.java
// -----------------------
// part of The Kelondro Database
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2006
// created: 12.01.2006
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.kelondro;
import java.util.Comparator;
import java.util.Iterator;
public class kelondroCollection {
private byte[] chunkcache;
private int chunkcount;
private int chunksize;
private long lastTimeRead, lastTimeWrote;
private String orderkey;
public kelondroCollection(int objectSize) {
this(objectSize, 0, null, new byte[0]);
}
public kelondroCollection(int objectSize, int objectCount, String signature, byte[] collectioncache) {
assert (collectioncache.length % objectSize == 0);
assert (objectCount <= collectioncache.length / objectSize);
this.chunksize = objectSize;
this.chunkcache = collectioncache;
this.chunkcount = objectCount;
this.orderkey = signature; // no current ordering
}
private void ensureSize(int elements) {
int needed = elements * chunksize;
if (chunkcache.length >= needed) return;
byte[] newChunkcache = new byte[needed];
System.arraycopy(chunkcache, 0, newChunkcache, 0, chunkcache.length);
chunkcache = newChunkcache;
newChunkcache = null;
}
public void trim() {
synchronized (chunkcache) {
int needed = chunkcount * chunksize;
if (chunkcache.length == needed) return;
byte[] newChunkcache = new byte[needed];
System.arraycopy(chunkcache, 0, newChunkcache, 0, newChunkcache.length);
chunkcache = newChunkcache;
newChunkcache = null;
}
}
public void add(byte[] a) {
assert (a.length <= chunksize);
synchronized (chunkcache) {
ensureSize(chunkcount + 1);
System.arraycopy(a, 0, chunkcache, chunksize * chunkcount, a.length);
chunkcount++;
this.orderkey = null;
}
}
public void addAll(kelondroCollection c) {
assert(this.chunksize >= c.chunksize);
synchronized(chunkcache) {
ensureSize(chunkcount + c.size());
}
Iterator i = c.elements();
while (i.hasNext()) {
add((byte[]) i.next());
}
}
public void remove(byte[] a) {
// the byte[] a may be shorter than the chunksize
if (chunkcount == 0) return;
synchronized(chunkcache) {
int p = find(a);
remove(p);
}
}
public void remove(byte[] a, Comparator c) {
// the byte[] a may be shorter than the chunksize
if (chunkcount == 0) return;
synchronized(chunkcache) {
int p = find(a, c);
remove(p);
}
}
private void remove(int p) {
if (chunkcount == 0) return;
if ((p < 0) || (p >= chunkcount)) return; // out of bounds, nothing to delete
System.arraycopy(chunkcache, (p + 1) * chunksize, chunkcache, p * chunksize, (chunkcount - p - 1) * chunksize);
chunkcount--;
}
private int find(byte[] a) {
// returns the chunknumber
for (int i = 0; i < chunkcount; i++) {
if (match(a, i)) return i;
}
return -1;
}
private int find(byte[] a, Comparator c) {
// returns the chunknumber
for (int i = 0; i < chunkcount; i++) {
if (compare(a, i, c) == 0) return i;
}
return -1;
}
public void removeAll(kelondroCollection c) {
Iterator i = c.elements();
while (i.hasNext()) remove((byte[]) i.next());
}
public void clear() {
this.chunkcount = 0;
this.chunkcache = new byte[0];
this.orderkey = null;
}
public int size() {
return chunkcount;
}
public Iterator elements() { // iterates byte[] - objects
return new chunkIterator();
}
public class chunkIterator implements Iterator {
int c = 0;
public chunkIterator() {
c = 0;
}
public boolean hasNext() {
return c < chunkcount;
}
public Object next() {
byte[] chunk = new byte[chunksize];
System.arraycopy(chunkcache, c * chunksize, chunk, 0, chunksize);
c++;
return chunk;
}
public void remove() {
c--;
System.arraycopy(chunkcache, (c + 1) * chunksize, chunkcache, c * chunksize, (chunkcount - c - 1) * chunksize);
chunkcount--;
}
}
public String getOrderingSignature() {
return this.orderkey;
}
public int binarySearch(byte[] key, Comparator c) {
assert (this.orderkey != null);
int l = 0;
int r = chunkcount - 1;
int p = 0;
int d;
while (l <= r) {
p = (l + r) >> 1;
d = compare(key, p, c);
if (d == 0) return p;
else if (d < 0) r = p - 1;
else l = ++p;
}
return -p - 1;
}
public void sort(kelondroOrder ko) {
if (this.orderkey == ko.signature()) return; // this is already sorted
qsort(0, chunkcount - 1, (Comparator) ko);
this.orderkey = ko.signature();
}
public void sort(int fromIndex, int toIndex, Comparator c) {
assert (fromIndex <= toIndex);
assert (fromIndex >= 0);
synchronized(chunkcache) {
qsort(fromIndex, toIndex, c);
}
}
private void swap(int i, int j) {
byte[] a = new byte[chunksize];
System.arraycopy(chunkcache, chunksize * i, a, 0, chunksize);
System.arraycopy(chunkcache, chunksize * j , chunkcache, chunksize * i, chunksize);
System.arraycopy(a, 0, chunkcache, chunksize * j, chunksize);
}
private void isort(int l, int r, Comparator c) {
for (int i = l + 1; i <= r; i++)
for (int j = i; j > l && compare(j - 1, j, c) > 0; j--)
swap(j, j - 1);
}
private void qsort(int l, int r, Comparator c) {
if (l >= r) return;
if (r - l < 10) {
isort(l, r, c);
return;
}
int i = l;
int j = r;
byte[] a = new byte[chunksize];
int pivot = (i + j) / 2;
System.arraycopy(chunkcache, pivot * chunksize, a, 0, chunksize);
while (i <= j) {
while (compare(a, i, c) == 1) i++; // chunkAt[i] < keybuffer
while (compare(a, j, c) == -1) j--; // chunkAt[j] > keybuffer
if (i <= j) {
swap(i, j);
i++;
j--;
}
}
qsort(l, j, c);
qsort(i, r, c);
}
public void uniq(Comparator c) {
assert (this.orderkey != null);
// removes double-occurrences of chunks
// this works only if the collection was ordered with sort before
synchronized (chunkcache) {
if (chunkcount <= 1) return;
int i = 0;
while (i < chunkcount - 1) {
if (compare(i, i + 1, c) == 0) {
remove(i);
} else {
i++;
}
}
}
}
public String toString() {
StringBuffer s = new StringBuffer();
Iterator i = elements();
if (i.hasNext()) s.append(new String((byte[]) i.next()).trim());
while (i.hasNext()) s.append(", " + new String((byte[]) i.next()).trim());
return new String(s);
}
public byte[] toByteArray() {
return this.chunkcache;
}
public boolean match(byte[] a, int chunknumber) {
if (chunknumber >= chunkcount)
return false;
int i = 0;
int p = chunknumber * chunksize;
final int len = a.length;
if (len > chunksize)
return false;
while (i < len)
if (a[i++] != chunkcache[p++])
return false;
return true;
}
public int compare(byte[] a, int chunknumber, Comparator c) {
// this can be enhanced
assert (chunknumber < chunkcount);
byte[] b = new byte[chunksize];
System.arraycopy(chunkcache, chunknumber * chunksize, b, 0, chunksize);
return c.compare(a, b);
}
public int compare(int i, int j, Comparator c) {
// this can be enhanced
assert (i < chunkcount);
assert (j < chunkcount);
byte[] a = new byte[chunksize];
byte[] b = new byte[chunksize];
System.arraycopy(chunkcache, i * chunksize, a, 0, chunksize);
System.arraycopy(chunkcache, j * chunksize, b, 0, chunksize);
return c.compare(a, b);
}
public static void main(String[] args) {
String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" };
kelondroCollection c = new kelondroCollection(10);
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
c.remove("fuenf".getBytes());
Iterator i = c.elements();
String s;
while (i.hasNext()) {
s = new String((byte[]) i.next()).trim();
System.out.print(s + ", ");
if (s.equals("drei")) i.remove();
}
System.out.println("");
System.out.println(c.toString());
c.sort(kelondroNaturalOrder.naturalOrder);
System.out.println(c.toString());
c.uniq(kelondroNaturalOrder.naturalOrder);
System.out.println(c.toString());
c.trim();
System.out.println(c.toString());
}
}

@ -0,0 +1,222 @@
package de.anomic.kelondro;
// a collectionIndex is an index to collection (kelondroCollection) objects
// such a collection ist defined by the following parameters
// - chunksize
// - chunkcount
// each of such a collection is stored in a byte[] which may or may not have space for more chunks
// than already exists in such an array. To store these arrays, we reserve entries in kelondroArray
// database files. There will be a set of array files for different sizes of the collection arrays.
// the 1st file has space for <loadfactor> chunks, the 2nd file for <loadfactor> * <loadfactor> chunks,
// the 3rd file for <loadfactor>^^3 chunks, and the n-th file for <loadfactor>^^n chunks.
// if the loadfactor is 4, then we have the following capacities:
// file 0: 4
// file 1: 16
// file 2: 64
// file 3: 256
// file 4: 1024
// file 5: 4096
// file 6:16384
// file 7:65536
// the maximum number of such files is called the partitions number.
// we don't want that these files grow too big, an kelondroOutOfLimitsException is throws if they
// are oversized.
// the collection arrays may be migration to another size during run-time, which means that not only the
// partitions as mentioned above are maintained, but also a set of "shadow-partitions", that represent old
// partitions and where data is read only and slowly migrated to the default partitions.
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
public class kelondroCollectionIndex {
private kelondroIndex index;
private File path;
private String filenameStub;
private int loadfactor;
private int chunksize;
private int partitions;
private int maxChunks;
private kelondroArray[] array;
private int[] arrayCapacity;
private static File arrayFile(File path, String filenameStub, int loadfactor, int chunksize, int partitionNumber) {
String lf = Integer.toHexString(loadfactor).toUpperCase();
while (lf.length() < 2) lf = "0" + lf;
String cs = Integer.toHexString(chunksize).toUpperCase();
while (cs.length() < 4) cs = "0" + cs;
String pn = Integer.toHexString(partitionNumber).toUpperCase();
while (pn.length() < 2) pn = "0" + pn;
return new File(path, filenameStub + "." + lf + "." + cs + "." + pn + ".kca"); // kelondro collection array
}
private static final long day = 1000 * 60 * 60 * 24;
private static int daysSince2000(long time) {
return (int) (time / day) - 10957;
}
public kelondroCollectionIndex(File path, String filenameStub, int keyLength, kelondroOrder indexOrder, long buffersize,
int loadfactor, int chunksize, int partitions) throws IOException {
this.path = path;
this.filenameStub = filenameStub;
this.chunksize = chunksize;
this.partitions = partitions;
this.loadfactor = loadfactor;
// create index file(s)
int[] columns;
columns = new int[3];
columns[0] = keyLength;
columns[1] = 4; // chunksize (number of bytes in a single chunk, needed for migration option)
columns[2] = 4; // chunkcount (number of chunks in this collection)
columns[3] = 4; // index (position in index file)
columns[4] = 2; // update time in days since 1.1.2000
index = new kelondroSplittedTree(path, filenameStub, indexOrder, buffersize, 8, columns, 1, 80, true);
// create array files
this.array = new kelondroArray[partitions];
this.arrayCapacity = new int[partitions];
// open array files
int load = 1;
for (int i = 0; i < partitions; i++) {
load = load * loadfactor;
array[i] = openArrayFile(chunksize, i);
arrayCapacity[i] = load;
}
this.maxChunks = load;
}
private kelondroArray openArrayFile(int genericChunkSize, int partitionNumber) throws IOException {
File f = arrayFile(path, filenameStub, loadfactor, genericChunkSize, partitionNumber);
if (f.exists()) {
return new kelondroArray(f);
} else {
int load = 1; for (int i = 0; i < partitionNumber; i++) load = load * loadfactor;
int[] columns = new int[4];
columns[0] = index.columnSize(0); // add always the key
columns[1] = 4; // chunkcount (raw format)
columns[2] = 2; // last time read
columns[3] = 2; // last time wrote
columns[4] = 2; // flag string, assigns collection order as currently stored in table
columns[5] = load * genericChunkSize;
return new kelondroArray(f, columns, 0, true);
}
}
private int arrayIndex(int requestedCapacity) throws kelondroOutOfLimitsException{
// the requestedCapacity is the number of wanted chunks
for (int i = 0; i < arrayCapacity.length; i++) {
if (arrayCapacity[i] >= requestedCapacity) return i;
}
throw new kelondroOutOfLimitsException(maxChunks, requestedCapacity);
}
public void put(byte[] key, kelondroCollection collection) throws IOException, kelondroOutOfLimitsException {
if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size());
// first find an old entry, if one exists
byte[][] oldindexrow = index.get(key);
// define the new storage array
byte[][] newarrayrow = new byte[][]{key,
kelondroNaturalOrder.encodeLong((long) collection.size(), 4),
collection.getOrderingSignature().getBytes(),
collection.toByteArray()};
if (oldindexrow == null) {
// the collection is new
// find appropriate partition for the collection:
int part = arrayIndex(collection.size());
// write a new entry in this array
int newRowNumber = array[part].add(newarrayrow);
// store the new row number in the index
index.put(new byte[][]{key,
kelondroNaturalOrder.encodeLong(this.chunksize, 4),
kelondroNaturalOrder.encodeLong(collection.size(), 4),
kelondroNaturalOrder.encodeLong((long) newRowNumber, 4),
kelondroNaturalOrder.encodeLong(daysSince2000(System.currentTimeMillis()), 2)
});
} else {
// overwrite the old collection
// read old information
//int chunksize = (int) kelondroNaturalOrder.decodeLong(oldindexrow[1]); // needed only for migration
int chunkcount = (int) kelondroNaturalOrder.decodeLong(oldindexrow[2]);
int rownumber = (int) kelondroNaturalOrder.decodeLong(oldindexrow[3]);
int oldPartitionNumber = arrayIndex(chunkcount);
int newPartitionNumber = arrayIndex(collection.size());
// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
// we don't need a new slot, just write in the old one
array[oldPartitionNumber].set(rownumber, newarrayrow);
// update the index entry
index.put(new byte[][]{key,
kelondroNaturalOrder.encodeLong(this.chunksize, 4),
kelondroNaturalOrder.encodeLong(collection.size(), 4),
kelondroNaturalOrder.encodeLong((long) rownumber, 4),
kelondroNaturalOrder.encodeLong(daysSince2000(System.currentTimeMillis()), 2)
});
} else {
// we need a new slot, that means we must first delete the old entry
array[oldPartitionNumber].remove(rownumber);
// write a new entry in the other array
int newRowNumber = array[newPartitionNumber].add(newarrayrow);
// store the new row number in the index
index.put(new byte[][]{key,
kelondroNaturalOrder.encodeLong(this.chunksize, 4),
kelondroNaturalOrder.encodeLong(collection.size(), 4),
kelondroNaturalOrder.encodeLong((long) newRowNumber, 4),
kelondroNaturalOrder.encodeLong(daysSince2000(System.currentTimeMillis()), 2)
});
}
}
}
public kelondroCollection get(byte[] key) throws IOException {
// find an entry, if one exists
byte[][] indexrow = index.get(key);
if (indexrow == null) return null;
// read values
int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]);
int chunkcount = (int) kelondroNaturalOrder.decodeLong(indexrow[2]);
int rownumber = (int) kelondroNaturalOrder.decodeLong(indexrow[3]);
int partitionnumber = arrayIndex(chunkcount);
// open array entry
byte[][] arrayrow = array[partitionnumber].get(rownumber);
if (arrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array does not contain expected row");
// read the row and define a collection
int chunkcountInArray = (int) kelondroNaturalOrder.decodeLong(arrayrow[1]);
if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray);
return new kelondroCollection(chunksize, chunkcount, new String(arrayrow[2]), arrayrow[3]);
}
public void remove(byte[] key) throws IOException {
// find an entry, if one exists
byte[][] indexrow = index.get(key);
if (indexrow == null) return;
// read values
//int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]);
int chunkcount = (int) kelondroNaturalOrder.decodeLong(indexrow[2]);
int rownumber = (int) kelondroNaturalOrder.decodeLong(indexrow[3]);
int partitionnumber = arrayIndex(chunkcount);
// remove array entry
array[partitionnumber].remove(rownumber);
}
/*
public Iterator collections(boolean up, boolean rotating) throws IOException {
// Objects are of type kelondroCollection
}
*/
public static void main(String[] args) {
System.out.println(new java.util.Date(10957 * day));
System.out.println(new java.util.Date(0));
System.out.println(daysSince2000(System.currentTimeMillis()));
}
}

@ -0,0 +1,56 @@
// kelondroOutOfLimitsException.java
// ---------------------------------
// part of The Kelondro Database
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2006
// created: 17.01.2006
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.kelondro;
public class kelondroOutOfLimitsException extends java.lang.RuntimeException {
private static final long serialVersionUID = 1L;
public kelondroOutOfLimitsException() {
super("unspecific-error");
}
public kelondroOutOfLimitsException(int expectedLimit, int actualSize) {
super("Object size is " + actualSize + "; it exceeds the size limit " + expectedLimit);
}
}
Loading…
Cancel
Save