re-write of kelondroCollectionIndex. This is the data structure that

shall replace the current assortment files.
* used the kelondroFlexTable to hold the index of collections
* used kelondroRow definitions to declare all data structures
* fixed several bugs that appeared in kelondroRowSet and kelondroRowCollection during testing


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2344 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent ebc2233092
commit 01f95eccd3

@ -187,7 +187,7 @@ public class dbtest {
} }
if (dbe.equals("kelondroFlexTable")) { if (dbe.equals("kelondroFlexTable")) {
File tablepath = new File(tablename).getParentFile(); File tablepath = new File(tablename).getParentFile();
table = new kelondroFlexTable(tablepath, new File(tablename).getName(), buffer, preload, testRow, true); table = new kelondroFlexTable(tablepath, new File(tablename).getName(), kelondroBase64Order.enhancedCoder, buffer, preload, testRow, true);
} }
if (dbe.equals("mysql")) { if (dbe.equals("mysql")) {
table = new dbTable("mysql", testRow); table = new dbTable("mysql", testRow);
@ -342,7 +342,7 @@ public class dbtest {
if (table instanceof kelondroTree) ((kelondroTree) table).close(); if (table instanceof kelondroTree) ((kelondroTree) table).close();
if (table instanceof kelondroFlexTable) ((kelondroFlexTable) table).close(); if (table instanceof kelondroFlexTable) ((kelondroFlexTable) table).close();
if (table instanceof kelondroSplittedTree) ((kelondroSplittedTree) table).close(); if (table instanceof kelondroSplittedTree) ((kelondroSplittedTree) table).close();
if (table instanceof dbTable) ((dbTable)table).closeDatabaseConnection(); if (table instanceof dbTable) ((dbTable)table).close();
long afterclose = System.currentTimeMillis(); long afterclose = System.currentTimeMillis();
@ -408,11 +408,11 @@ final class dbTable implements kelondroIndex {
} }
public void closeDatabaseConnection() throws Exception { public void close() throws IOException {
try { try {
this.theDBConnection.close(); this.theDBConnection.close();
} catch (Exception e) { } catch (Exception e) {
throw new Exception ("Unable to close the database connection."); throw new IOException("Unable to close the database connection.");
} }
} }

@ -45,7 +45,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
collectionIndex = new kelondroCollectionIndex( collectionIndex = new kelondroCollectionIndex(
path, filenameStub, 9 /*keyLength*/, path, filenameStub, 9 /*keyLength*/,
kelondroNaturalOrder.naturalOrder, buffersize, preloadTime, kelondroNaturalOrder.naturalOrder, buffersize, preloadTime,
4 /*loadfactor*/, rowdef, 8 /*partitions*/); 4 /*loadfactor*/, rowdef);
} }
public int size() { public int size() {

@ -147,7 +147,7 @@ public class indexURLEntry implements Cloneable, indexEntry {
} }
public String toPropertyForm() { public String toPropertyForm() {
return entry.toPropertyForm(true); return entry.toPropertyForm(true, false);
} }
public Entry toKelondroEntry() { public Entry toKelondroEntry() {

@ -1,6 +1,6 @@
package de.anomic.kelondro; package de.anomic.kelondro;
// a collectionIndex is an index to collection (kelondroCollection) objects // a collectionIndex is an index to kelondroRowCollection objects
// such a collection ist defined by the following parameters // such a collection ist defined by the following parameters
// - chunksize // - chunksize
// - chunkcount // - chunkcount
@ -27,18 +27,35 @@ package de.anomic.kelondro;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public class kelondroCollectionIndex { public class kelondroCollectionIndex {
private kelondroIndex index; private kelondroIndex index;
private File path; private File path;
private String filenameStub; private String filenameStub;
private int loadfactor; private int loadfactor;
//private int partitions; private Map arrays; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects
private int maxChunks; private kelondroRow rowdef; // definition of the payload (chunks inside the collections)
private kelondroFixedWidthArray[] array; // private int partitions; // this is the maxmimum number of array files; yet not used
private int[] arrayCapacity;
private kelondroRow rowdef; private static final int idx_col_key = 0; // the index
private static final int idx_col_chunksize = 1; // chunksize (number of bytes in a single chunk, needed for migration option)
private static final int idx_col_chunkcount = 2; // chunkcount (number of chunks in this collection) needed to identify array file that has the chunks
private static final int idx_col_indexpos = 3; // indexpos (position in index file)
private static final int idx_col_update = 4; // a time stamp, update time in days since 1.1.2000
private static kelondroRow indexRow(int keylen) {
return new kelondroRow(
"byte[] key-" + keylen + "," +
"int chunksize-4 {b256}," +
"int chunkcount-4 {b256}," +
"int indexpos-4 {b256}," +
"short update-2 {b256}"
);
}
private static File arrayFile(File path, String filenameStub, int loadfactor, int chunksize, int partitionNumber) { private static File arrayFile(File path, String filenameStub, int loadfactor, int chunksize, int partitionNumber) {
@ -51,175 +68,282 @@ public class kelondroCollectionIndex {
return new File(path, filenameStub + "." + lf + "." + cs + "." + pn + ".kca"); // kelondro collection array return new File(path, filenameStub + "." + lf + "." + cs + "." + pn + ".kca"); // kelondro collection array
} }
private static final long day = 1000 * 60 * 60 * 24;
private static int daysSince2000(long time) {
return (int) (time / day) - 10957;
}
public kelondroCollectionIndex(File path, String filenameStub, int keyLength, kelondroOrder indexOrder, public kelondroCollectionIndex(File path, String filenameStub, int keyLength, kelondroOrder indexOrder,
long buffersize, long preloadTime, long buffersize, long preloadTime,
int loadfactor, kelondroRow rowdef, int partitions) throws IOException { int loadfactor, kelondroRow rowdef) throws IOException {
// the buffersize is number of bytes that are only used if the kelondroFlexTable is backed up with a kelondroTree
this.path = path; this.path = path;
this.filenameStub = filenameStub; this.filenameStub = filenameStub;
this.rowdef = rowdef; this.rowdef = rowdef;
//this.partitions = partitions;
this.loadfactor = loadfactor; this.loadfactor = loadfactor;
// create index file(s) // create index table
int[] columns; index = new kelondroFlexTable(path, filenameStub + ".index", indexOrder, buffersize, preloadTime, indexRow(keyLength), true);
columns = new int[3];
columns[0] = keyLength;
columns[1] = 4; // chunksize (number of bytes in a single chunk, needed for migration option)
columns[2] = 4; // chunkcount (number of chunks in this collection)
columns[3] = 4; // index (position in index file)
columns[4] = 2; // update time in days since 1.1.2000
index = new kelondroSplittedTree(path, filenameStub, indexOrder, buffersize, preloadTime, 8, new kelondroRow(columns), 1, 80, true);
// create array files
this.array = new kelondroFixedWidthArray[partitions];
this.arrayCapacity = new int[partitions];
// open array files // open array files
int load = 1; this.arrays = new HashMap(); // all entries will be dynamically created with getArray()
for (int i = 0; i < partitions; i++) {
load = load * loadfactor;
array[i] = openArrayFile(i);
arrayCapacity[i] = load;
}
this.maxChunks = load;
} }
private kelondroFixedWidthArray openArrayFile(int partitionNumber) throws IOException { private kelondroFixedWidthArray openArrayFile(int partitionNumber, boolean create) throws IOException {
File f = arrayFile(path, filenameStub, loadfactor, rowdef.objectsize(), partitionNumber); File f = arrayFile(path, filenameStub, loadfactor, rowdef.objectsize(), partitionNumber);
if (f.exists()) { if (f.exists()) {
return new kelondroFixedWidthArray(f); return new kelondroFixedWidthArray(f);
} else if (create) {
int load = arrayCapacity(partitionNumber);
kelondroRow row = new kelondroRow(
"byte[] key-" + index.row().width(0) + "," +
"byte[] collection-" + (kelondroRowCollection.exportOverheadSize + load * this.rowdef.objectsize())
);
return new kelondroFixedWidthArray(f, row, 0, true);
} else { } else {
int load = 1; for (int i = 0; i < partitionNumber; i++) load = load * loadfactor; return null;
int[] columns = new int[4];
columns[0] = index.row().width(0); // add always the key
columns[1] = 4; // chunkcount (raw format)
columns[2] = 2; // last time read
columns[3] = 2; // last time wrote
columns[4] = 2; // flag string, assigns collection order as currently stored in table
columns[5] = load * rowdef.objectsize();
return new kelondroFixedWidthArray(f, new kelondroRow(columns), 0, true);
} }
} }
private kelondroFixedWidthArray getArray(int partitionNumber, int chunksize) {
String accessKey = partitionNumber + "-" + chunksize;
kelondroFixedWidthArray array = (kelondroFixedWidthArray) arrays.get(accessKey);
if (array != null) return array;
try {
array = openArrayFile(partitionNumber, true);
} catch (IOException e) {
return null;
}
arrays.put(accessKey, array);
return array;
}
private int arrayCapacity(int arrayCounter) {
int load = this.loadfactor;
for (int i = 0; i < arrayCounter; i++) load = load * this.loadfactor;
return load;
}
private int arrayIndex(int requestedCapacity) throws kelondroOutOfLimitsException{ private int arrayIndex(int requestedCapacity) throws kelondroOutOfLimitsException{
// the requestedCapacity is the number of wanted chunks // the requestedCapacity is the number of wanted chunks
for (int i = 0; i < arrayCapacity.length; i++) { int load = 1, i = 0;
if (arrayCapacity[i] >= requestedCapacity) return i; while (true) {
load = load * this.loadfactor;
if (load >= requestedCapacity) return i;
i++;
} }
throw new kelondroOutOfLimitsException(maxChunks, requestedCapacity);
} }
public int size() throws IOException { public int size() throws IOException {
return index.size(); return index.size();
} }
public void put(byte[] key, kelondroRowCollection collection) throws IOException, kelondroOutOfLimitsException { public void put(byte[] key, kelondroRowCollection collection) throws IOException, kelondroOutOfLimitsException {
if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size()); // this replaces an old collection by a new one
// this method is not approriate to extend an existing collection with another collection
insert(key, collection, false);
}
public void join(byte[] key, kelondroRowCollection collection) throws IOException, kelondroOutOfLimitsException {
insert(key, collection, true);
}
private void insert(byte[] key, kelondroRowCollection collection, boolean join) throws IOException, kelondroOutOfLimitsException {
//if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size());
if (collection.size() == 0) {
// this is not a replacement, it is a deletion
remove(key);
return;
}
// first find an old entry, if one exists // first find an old entry, if one exists
kelondroRow.Entry oldindexrow = index.get(key); kelondroRow.Entry oldindexrow = index.get(key);
// define the new storage array
byte[][] newarrayrow = new byte[][]{key,
kelondroNaturalOrder.encodeLong((long) collection.size(), 4),
null /*collection.getOrderingSignature().getBytes()*/,
collection.toByteArray()};
if (oldindexrow == null) { if (oldindexrow == null) {
// the collection is new // the collection is new
// find appropriate partition for the collection: overwrite(key, collection);
int part = arrayIndex(collection.size());
// write a new entry in this array
int newRowNumber = array[part].add(array[part].row().newEntry(newarrayrow));
// store the new row number in the index
kelondroRow.Entry e = index.row().newEntry();
e.setCol(0, key);
e.setColLongB256(1, this.rowdef.objectsize());
e.setColLongB256(2, collection.size());
e.setColLongB256(3, (long) newRowNumber);
e.setColLongB256(4, daysSince2000(System.currentTimeMillis()));
index.put(e);
} else { } else {
// overwrite the old collection // overwrite the old collection
// read old information // read old information
//int chunksize = (int) kelondroNaturalOrder.decodeLong(oldindexrow[1]); // needed only for migration int oldchunksize = (int) oldindexrow.getColLongB256(idx_col_chunksize); // needed only for migration
int chunkcount = (int) oldindexrow.getColLongB256(2); int oldchunkcount = (int) oldindexrow.getColLongB256(idx_col_chunkcount);
int rownumber = (int) oldindexrow.getColLongB256(3); int oldrownumber = (int) oldindexrow.getColLongB256(idx_col_indexpos);
int oldPartitionNumber = arrayIndex(chunkcount); int oldPartitionNumber = arrayIndex(oldchunkcount);
if (join) {
// load the old collection and join it with the old
// open array entry
kelondroFixedWidthArray oldarray = getArray(oldPartitionNumber, oldchunksize);
//System.out.println("joining for key " + new String(key) + ", oldrow=" + oldrownumber + ", oldchunkcount=" + oldchunkcount + ", array file=" + oldarray.filename);
kelondroRow.Entry oldarrayrow = oldarray.get(oldrownumber);
if (oldarrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, oldchunksize, oldPartitionNumber).toString(), "array does not contain expected row");
// read the row and define a collection
kelondroRowSet oldcollection = new kelondroRowSet(this.rowdef, oldarrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
// join with new collection
oldcollection.addAll(collection);
collection = oldcollection;
}
int newPartitionNumber = arrayIndex(collection.size()); int newPartitionNumber = arrayIndex(collection.size());
// see if we need new space or if we can overwrite the old space // see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) { if (oldPartitionNumber == newPartitionNumber) {
// we don't need a new slot, just write in the old one // we don't need a new slot, just write into the old one
array[oldPartitionNumber].set(rownumber, array[oldPartitionNumber].row().newEntry(newarrayrow));
// find array file
kelondroFixedWidthArray array = getArray(newPartitionNumber, this.rowdef.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// overwrite entry in this array
array.set(oldrownumber, arrayEntry);
// update the index entry // update the index entry
kelondroRow.Entry e = index.row().newEntry(); oldindexrow.setColLongB256(idx_col_chunkcount, collection.size());
e.setCol(0, key); oldindexrow.setColLongB256(idx_col_update, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
e.setColLongB256(1, this.rowdef.objectsize()); index.put(oldindexrow);
e.setColLongB256(2, collection.size());
e.setColLongB256(3, (long) rownumber);
e.setColLongB256(4, daysSince2000(System.currentTimeMillis()));
index.put(e);
} else { } else {
// we need a new slot, that means we must first delete the old entry // we need a new slot, that means we must first delete the old entry
array[oldPartitionNumber].remove(rownumber); // find array file
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldchunksize);
// delete old entry
array.remove(oldrownumber);
// write a new entry in the other array // write a new entry in the other array
int newRowNumber = array[newPartitionNumber].add(array[newPartitionNumber].row().newEntry(newarrayrow)); overwrite(key, collection);
// store the new row number in the index
kelondroRow.Entry e = index.row().newEntry();
e.setCol(0, key);
e.setColLongB256(1, this.rowdef.objectsize());
e.setColLongB256(2, collection.size());
e.setColLongB256(3, (long) newRowNumber);
e.setColLongB256(4, daysSince2000(System.currentTimeMillis()));
index.put(e);
} }
} }
} }
private void overwrite(byte[] key, kelondroRowCollection collection) throws IOException {
// helper method, should not be called directly
// simply store a collection without check if the collection existed before
// find array file
kelondroFixedWidthArray array = getArray(arrayIndex(collection.size()), this.rowdef.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// write a new entry in this array
int newRowNumber = array.add(arrayEntry);
// store the new row number in the index
kelondroRow.Entry indexEntry = index.row().newEntry();
indexEntry.setCol(idx_col_key, key);
indexEntry.setColLongB256(idx_col_chunksize, this.rowdef.objectsize());
indexEntry.setColLongB256(idx_col_chunkcount, collection.size());
indexEntry.setColLongB256(idx_col_indexpos, (long) newRowNumber);
indexEntry.setColLongB256(idx_col_update, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(indexEntry);
}
public kelondroRowCollection get(byte[] key) throws IOException { public kelondroRowSet get(byte[] key) throws IOException {
// find an entry, if one exists // find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key); kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null; if (indexrow == null) return null;
// read values // read values
int chunksize = (int) indexrow.getColLongB256(1); int chunksize = (int) indexrow.getColLongB256(idx_col_chunksize);
int chunkcount = (int) indexrow.getColLongB256(2); int chunkcount = (int) indexrow.getColLongB256(idx_col_chunkcount);
int rownumber = (int) indexrow.getColLongB256(3); int rownumber = (int) indexrow.getColLongB256(idx_col_indexpos);
int partitionnumber = arrayIndex(chunkcount); int partitionnumber = arrayIndex(chunkcount);
// open array entry // open array entry
kelondroRow.Entry arrayrow = array[partitionnumber].get(rownumber); kelondroFixedWidthArray array = getArray(partitionnumber, chunksize);
kelondroRow.Entry arrayrow = array.get(rownumber);
if (arrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array does not contain expected row"); if (arrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array does not contain expected row");
// read the row and define a collection // read the row and define a collection
int chunkcountInArray = (int) arrayrow.getColLongB256(1); kelondroRowSet collection = new kelondroRowSet(this.rowdef, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
int chunkcountInArray = collection.size();
if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray); if (chunkcountInArray != chunkcount) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber).toString(), "array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray);
return new kelondroRowCollection(rowdef, chunkcount, arrayrow.getColBytes(3)); return collection;
} }
public void remove(byte[] key) throws IOException { public int remove(byte[] key) throws IOException {
// returns the number of chunks that have been deleted with the removed collection
// find an entry, if one exists // find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key); kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return; if (indexrow == null) return 0;
// read values // read values
//int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]); int chunksize = (int) indexrow.getColLongB256(idx_col_chunksize);
int chunkcount = (int) indexrow.getColLongB256(2); int chunkcount = (int) indexrow.getColLongB256(idx_col_chunkcount);
int rownumber = (int) indexrow.getColLongB256(3); int rownumber = (int) indexrow.getColLongB256(idx_col_indexpos);
int partitionnumber = arrayIndex(chunkcount); int partitionnumber = arrayIndex(chunkcount);
// open array entry
kelondroFixedWidthArray array = getArray(partitionnumber, chunksize);
// remove array entry // remove array entry
array[partitionnumber].remove(rownumber); array.remove(rownumber);
return chunkcount;
} }
public void close() throws IOException {
this.index.close();
Iterator i = arrays.values().iterator();
while (i.hasNext()) {
((kelondroFixedWidthArray) i.next()).close();
}
}
public static void main(String[] args) { public static void main(String[] args) {
System.out.println(new java.util.Date(10957 * day));
System.out.println(new java.util.Date(0)); // define payload structure
System.out.println(daysSince2000(System.currentTimeMillis())); kelondroRow rowdef = new kelondroRow("byte[] eins-10, byte[] zwei-80");
File path = new File(args[0]);
String filenameStub = args[1];
long buffersize = 10000000;
long preloadTime = 10000;
try {
// initialize collection index
kelondroCollectionIndex collectionIndex = new kelondroCollectionIndex(
path, filenameStub, 9 /*keyLength*/,
kelondroNaturalOrder.naturalOrder, buffersize, preloadTime,
4 /*loadfactor*/, rowdef);
// fill index with values
kelondroRowSet collection = new kelondroRowSet(rowdef);
collection.add(rowdef.newEntry(new byte[][]{"abc".getBytes(), "efg".getBytes()}));
collectionIndex.put("erstes".getBytes(), collection);
for (int i = 0; i <= 17; i++) {
collection = new kelondroRowSet(rowdef);
for (int j = 0; j < i; j++) {
collection.add(rowdef.newEntry(new byte[][]{("abc" + j).getBytes(), "xxx".getBytes()}));
}
collectionIndex.put(("key-" + i).getBytes(), collection);
}
// extend collections with more values
for (int i = 0; i <= 17; i++) {
collection = new kelondroRowSet(rowdef);
for (int j = 0; j < i; j++) {
collection.add(rowdef.newEntry(new byte[][]{("def" + j).getBytes(), "xxx".getBytes()}));
}
collectionIndex.join(("key-" + i).getBytes(), collection);
}
collectionIndex.close();
// printout of index
kelondroFlexTable index = new kelondroFlexTable(path, filenameStub + ".index", kelondroNaturalOrder.naturalOrder, buffersize, preloadTime, indexRow(9), true);
index.print();
index.close();
} catch (IOException e) {
e.printStackTrace();
}
} }
} }

@ -56,6 +56,7 @@ public class kelondroColumn {
// example: <UDate-3> // example: <UDate-3>
// cut quotes etc. // cut quotes etc.
celldef = celldef.trim();
if (celldef.startsWith("<")) celldef = celldef.substring(1); if (celldef.startsWith("<")) celldef = celldef.substring(1);
if (celldef.endsWith(">")) celldef = celldef.substring(0, celldef.length() - 1); if (celldef.endsWith(">")) celldef = celldef.substring(0, celldef.length() - 1);
@ -200,4 +201,34 @@ public class kelondroColumn {
return this.description; return this.description;
} }
public String toString() {
StringBuffer s = new StringBuffer();
switch (celltype) {
case celltype_boolean:
s.append("boolean ");
break;
case celltype_binary:
s.append("byte[] ");
break;
case celltype_string:
s.append("String ");
break;
case celltype_cardinal:
s.append("Cardinal ");
break;
}
s.append(nickname);
s.append('-');
s.append(cellwidth);
s.append(' ');
switch (encoder) {
case encoder_b64e:
s.append(" {b64e}");
break;
case encoder_b256:
s.append(" {b256}");
break;
}
return new String(s);
}
} }

@ -92,7 +92,7 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro
} }
public synchronized kelondroRow.Entry get(int index) throws IOException { public synchronized kelondroRow.Entry get(int index) throws IOException {
if (index >= size()) throw new kelondroException(filename, "out of bounds, index=" + index + ", size=" + size()); //if (index >= size()) throw new kelondroException(filename, "out of bounds, index=" + index + ", size=" + size());
return row().newEntry(getNode(new Handle(index)).getValueRow()); return row().newEntry(getNode(new Handle(index)).getValueRow());
} }

@ -33,23 +33,25 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
private kelondroBytesIntMap index; private kelondroBytesIntMap index;
public kelondroFlexTable(File path, String tablename, long buffersize, long preloadTime, kelondroRow rowdef, boolean exitOnFail) throws IOException { public kelondroFlexTable(File path, String tablename, kelondroOrder objectOrder, long buffersize, long preloadTime, kelondroRow rowdef, boolean exitOnFail) throws IOException {
super(path, tablename, rowdef, exitOnFail); super(path, tablename, rowdef, exitOnFail);
File newpath = new File(path, tablename + ".table"); File newpath = new File(path, tablename + ".table");
File indexfile = new File(newpath, "col.000.index"); File indexfile = new File(newpath, "col.000.index");
kelondroIndex ki = null; kelondroIndex ki = null;
String description = new String(this.col[0].getDescription()); String description = new String(this.col[0].getDescription());
System.out.println("*** Last Startup time: " + description.substring(4)); int p = description.indexOf(';', 4);
long stt = (p > 0) ? Long.parseLong(description.substring(4, p)) : 0;
System.out.println("*** Last Startup time: " + stt + " milliseconds");
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
if (indexfile.exists()) { if (indexfile.exists()) {
// use existing index file // use existing index file
System.out.println("*** Using File index " + indexfile); System.out.println("*** Using File index " + indexfile);
ki = new kelondroTree(indexfile, buffersize, preloadTime, 10); ki = new kelondroTree(indexfile, buffersize, preloadTime, 10);
} else if (size() > 100000) { } else if (stt > preloadTime) {
// generate new index file // generate new index file
System.out.print("*** Generating File index for " + size() + " entries from " + indexfile); System.out.print("*** Generating File index for " + size() + " entries from " + indexfile);
ki = initializeTreeIndex(indexfile, buffersize, preloadTime); ki = initializeTreeIndex(indexfile, buffersize, preloadTime, objectOrder);
System.out.println(" -done-"); System.out.println(" -done-");
System.out.println(ki.size() System.out.println(ki.size()
@ -58,7 +60,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
} else { } else {
// fill the index // fill the index
System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath); System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath);
ki = initializeRamIndex(); ki = initializeRamIndex(objectOrder);
System.out.println(" -done-"); System.out.println(" -done-");
System.out.println(ki.size() System.out.println(ki.size()
@ -71,9 +73,9 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
super.col[0].setDescription(description.getBytes()); super.col[0].setDescription(description.getBytes());
} }
private kelondroIndex initializeRamIndex() throws IOException { private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOException {
kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new int[]{super.row().width(0), 4}), 0); kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new int[]{super.row().width(0), 4}), 0);
ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0); ri.setOrdering(objectOrder, 0);
Iterator content = super.col[0].contentNodes(-1); Iterator content = super.col[0].contentNodes(-1);
kelondroRecords.Node node; kelondroRecords.Node node;
kelondroRow.Entry indexentry; kelondroRow.Entry indexentry;
@ -92,14 +94,15 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
} }
System.out.print(" -ordering- "); System.out.print(" -ordering- ");
System.out.flush(); System.out.flush();
ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
ri.shape(); ri.shape();
return ri; return ri;
} }
private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime) throws IOException { private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException {
kelondroTree index = new kelondroTree(indexfile, buffersize, preloadTime, 10, rowdef.width(0), 4, true); kelondroTree index = new kelondroTree(indexfile, buffersize, preloadTime, 10,
new kelondroRow("byte[] key-" + rowdef.width(0) + ", int reference-4"),
objectOrder, 2, 80, true);
Iterator content = super.col[0].contentNodes(-1); Iterator content = super.col[0].contentNodes(-1);
kelondroRecords.Node node; kelondroRecords.Node node;
kelondroRow.Entry indexentry; kelondroRow.Entry indexentry;

@ -186,8 +186,9 @@ public class kelondroFlexWidthArray implements kelondroArray {
for (int i = 0; i < size(); i++) { for (int i = 0; i < size(); i++) {
System.out.print("row " + i + ": "); System.out.print("row " + i + ": ");
row = get(i); row = get(i);
for (int j = 0; j < row().columns(); j++) System.out.print(((row.empty(j)) ? "NULL" : row.getColString(j, "UTF-8")) + ", "); System.out.println(row.toString());
System.out.println(); //for (int j = 0; j < row().columns(); j++) System.out.print(((row.empty(j)) ? "NULL" : row.getColString(j, "UTF-8")) + ", ");
//System.out.println();
} }
System.out.println("EndOfTable"); System.out.println("EndOfTable");
} }

@ -60,5 +60,5 @@ public interface kelondroIndex {
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException; public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
public kelondroRow.Entry remove(byte[] key) throws IOException; public kelondroRow.Entry remove(byte[] key) throws IOException;
//public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException; //public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException;
public void close() throws IOException;
} }

@ -130,6 +130,16 @@ public class kelondroRow {
return w; return w;
} }
public String toString() {
StringBuffer s = new StringBuffer();
s.append(row[0].toString());
for (int i = 1; i < row.length; i++) {
s.append(", ");
s.append(row[i].toString());
}
return new String(s);
}
public Entry newEntry() { public Entry newEntry() {
return new Entry(); return new Entry();
} }
@ -370,7 +380,7 @@ public class kelondroRow {
} }
*/ */
public String toPropertyForm(boolean includeBraces) { public String toPropertyForm(boolean includeBraces, boolean decimalCardinal) {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
if (includeBraces) sb.append("{"); if (includeBraces) sb.append("{");
int encoder, cellwidth; int encoder, cellwidth;
@ -395,15 +405,20 @@ public class kelondroRow {
sb.append(','); sb.append(',');
continue; continue;
case kelondroColumn.celltype_cardinal: case kelondroColumn.celltype_cardinal:
if (encoder == kelondroColumn.encoder_b64e) { if (decimalCardinal) {
sb.append(row[i].nickname());
sb.append('=');
sb.append(Long.toString(bytes2long(rowinstance, colstart[i], cellwidth)));
sb.append(',');
continue;
} else if (encoder == kelondroColumn.encoder_b64e) {
sb.append(row[i].nickname()); sb.append(row[i].nickname());
sb.append('='); sb.append('=');
long c = bytes2long(rowinstance, colstart[i], cellwidth); long c = bytes2long(rowinstance, colstart[i], cellwidth);
sb.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(c, cellwidth).getBytes()); sb.append(kelondroBase64Order.enhancedCoder.encodeLongSmart(c, cellwidth).getBytes());
sb.append(','); sb.append(',');
continue; continue;
} } else throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")");
throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")");
} }
} }
if (sb.charAt(sb.length() - 1) == ',') sb.deleteCharAt(sb.length() - 1); // remove ',' at end if (sb.charAt(sb.length() - 1) == ',') sb.deleteCharAt(sb.length() - 1); // remove ',' at end
@ -412,14 +427,7 @@ public class kelondroRow {
} }
public String toString() { public String toString() {
StringBuffer b = new StringBuffer(); return toPropertyForm(true, true);
b.append('{');
for (int i = 0; i < columns(); i++) {
b.append(getColString(i, null));
if (i < columns() - 1) b.append(", ");
}
b.append('}');
return new String(b);
} }
} }

@ -112,23 +112,6 @@ public class kelondroRowBufferedSet extends kelondroRowSet {
} }
} }
public byte[] toByteArray() {
synchronized (buffer) {
flush();
return super.toByteArray();
}
}
/*
public void add(byte[] a) {
this.add(super.rowdef.newEntry(a));
}
public void add(kelondroRow.Entry a) {
this.put(a);
}
*/
public kelondroRow.Entry get(byte[] key) { public kelondroRow.Entry get(byte[] key) {
long handle = profile.startRead(); long handle = profile.startRead();
kelondroRow.Entry entry = null; kelondroRow.Entry entry = null;

@ -35,7 +35,15 @@ public class kelondroRowCollection {
protected int sortBound; protected int sortBound;
protected kelondroOrder sortOrder; protected kelondroOrder sortOrder;
protected int sortColumn; protected int sortColumn;
private static final int exp_chunkcount = 0;
private static final int exp_last_read = 1;
private static final int exp_last_wrote = 2;
private static final int exp_order_type = 3;
private static final int exp_order_col = 4;
private static final int exp_order_bound = 5;
private static final int exp_collection = 6;
public kelondroRowCollection(kelondroRow rowdef) { public kelondroRowCollection(kelondroRow rowdef) {
this(rowdef, 0); this(rowdef, 0);
} }
@ -51,17 +59,72 @@ public class kelondroRowCollection {
this.lastTimeWrote = System.currentTimeMillis(); this.lastTimeWrote = System.currentTimeMillis();
} }
public kelondroRowCollection(kelondroRow rowdef, int objectCount, byte[] cache) { public kelondroRowCollection(kelondroRow rowdef, int objectCount, byte[] cache, kelondroOrder sortOrder, int sortColumn, int sortBound) {
this.rowdef = rowdef; this.rowdef = rowdef;
this.chunkcache = cache; this.chunkcache = cache;
this.chunkcount = objectCount; this.chunkcount = objectCount;
this.sortColumn = 0; this.sortColumn = sortColumn;
this.sortOrder = null; this.sortOrder = sortOrder;
this.sortBound = 0; this.sortBound = sortBound;
this.lastTimeRead = System.currentTimeMillis(); this.lastTimeRead = System.currentTimeMillis();
this.lastTimeWrote = System.currentTimeMillis(); this.lastTimeWrote = System.currentTimeMillis();
} }
public kelondroRowCollection(kelondroRow rowdef, byte[] exportedCollectionRowinstance) {
this.rowdef = rowdef;
kelondroRow.Entry exportedCollection = exportRow(exportedCollectionRowinstance.length - exportOverheadSize).newEntry(exportedCollectionRowinstance);
this.chunkcount = (int) exportedCollection.getColLongB256(exp_chunkcount);
this.lastTimeRead = (exportedCollection.getColLongB256(exp_last_read) + 10957) * day;
this.lastTimeWrote = (exportedCollection.getColLongB256(exp_last_wrote) + 10957) * day;
String sortOrderKey = exportedCollection.getColString(exp_order_type, null);
if (sortOrderKey.equals("__")) {
this.sortOrder = null;
} else {
this.sortOrder = kelondroNaturalOrder.bySignature(sortOrderKey);
if (this.sortOrder == null) this.sortOrder = kelondroBase64Order.bySignature(sortOrderKey);
}
this.sortColumn = (int) exportedCollection.getColLongB256(exp_order_col);
this.sortBound = (int) exportedCollection.getColLongB256(exp_order_bound);
this.chunkcache = exportedCollection.getColBytes(exp_collection);
}
private static final long day = 1000 * 60 * 60 * 24;
public static int daysSince2000(long time) {
return (int) (time / day) - 10957;
}
private kelondroRow exportRow(int chunkcachelength) {
// find out the size of this collection
return new kelondroRow(
"int size-4 {b256}," +
"short lastread-2 {b256}," + // as daysSince2000
"short lastwrote-2 {b256}," + // as daysSince2000
"byte[] orderkey-2," +
"short ordercol-2 {b256}," +
"short orderbound-2 {b256}," +
"byte[] collection-" + chunkcachelength
);
}
public static final int exportOverheadSize = 14;
public byte[] exportCollection() {
// returns null if the collection is empty
if (size() == 0) return null;
trim();
kelondroRow row = exportRow(chunkcache.length);
kelondroRow.Entry entry = row.newEntry();
entry.setColLongB256(exp_chunkcount, size());
entry.setColLongB256(exp_last_read, daysSince2000(this.lastTimeRead));
entry.setColLongB256(exp_last_wrote, daysSince2000(this.lastTimeWrote));
entry.setCol(exp_order_type, (this.sortOrder == null) ? "__".getBytes() : this.sortOrder.signature().getBytes());
entry.setColLongB256(exp_order_col, this.sortColumn);
entry.setColLongB256(exp_order_bound, this.sortBound);
entry.setCol(exp_collection, chunkcache);
return entry.bytes();
}
public kelondroRow row() { public kelondroRow row() {
return this.rowdef; return this.rowdef;
} }
@ -357,10 +420,6 @@ public class kelondroRowCollection {
while (i.hasNext()) s.append(", " + ((kelondroRow.Entry) i.next()).toString()); while (i.hasNext()) s.append(", " + ((kelondroRow.Entry) i.next()).toString());
return new String(s); return new String(s);
} }
public byte[] toByteArray() {
return this.chunkcache;
}
private final int compare(int i, int j) { private final int compare(int i, int j) {
assert (i < chunkcount); assert (i < chunkcount);
@ -385,4 +444,9 @@ public class kelondroRowCollection {
return c; return c;
} }
public static void main(String[] args) {
System.out.println(new java.util.Date(10957 * day));
System.out.println(new java.util.Date(0));
System.out.println(daysSince2000(System.currentTimeMillis()));
}
} }

@ -36,7 +36,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
private kelondroProfile profile; private kelondroProfile profile;
private TreeSet removeMarker; private TreeSet removeMarker;
public kelondroRowSet(kelondroRow rowdef) { public kelondroRowSet(kelondroRow rowdef) {
super(rowdef); super(rowdef);
this.removeMarker = new TreeSet(); this.removeMarker = new TreeSet();
@ -49,6 +49,12 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
this.profile = new kelondroProfile(); this.profile = new kelondroProfile();
} }
public kelondroRowSet(kelondroRow rowdef, byte[] exportedCollectionRowinstance) {
super(rowdef, exportedCollectionRowinstance);
this.removeMarker = new TreeSet();
this.profile = new kelondroProfile();
}
public kelondroRow.Entry get(byte[] key) { public kelondroRow.Entry get(byte[] key) {
return get(key, 0, key.length); return get(key, 0, key.length);
} }
@ -138,6 +144,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
public void shape() { public void shape() {
//System.out.println("SHAPE"); //System.out.println("SHAPE");
if (this.sortOrder == null) return; // we cannot shape without an object order
synchronized (chunkcache) { synchronized (chunkcache) {
resolveMarkedRemoved(); resolveMarkedRemoved();
super.sort(); super.sort();
@ -288,17 +295,17 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
private int compare(byte[] a, int astart, int alength, int chunknumber) { private int compare(byte[] a, int astart, int alength, int chunknumber) {
assert (chunknumber < chunkcount); assert (chunknumber < chunkcount);
int l = Math.min(this.rowdef.width(0), Math.min(a.length - astart, alength)); int l = Math.min(this.rowdef.width(this.sortColumn), Math.min(a.length - astart, alength));
return this.sortOrder.compare(a, astart, l, chunkcache, chunknumber * this.rowdef.objectsize(), l); return this.sortOrder.compare(a, astart, l, chunkcache, chunknumber * this.rowdef.objectsize() + this.rowdef.colstart[this.sortColumn], this.rowdef.width(this.sortColumn));
} }
private boolean match(byte[] a, int astart, int alength, int chunknumber) { private boolean match(byte[] a, int astart, int alength, int chunknumber) {
if (chunknumber >= chunkcount) return false; if (chunknumber >= chunkcount) return false;
int i = 0; int i = 0;
int p = chunknumber * this.rowdef.objectsize(); int p = chunknumber * this.rowdef.objectsize();
final int len = Math.min(this.rowdef.width(0), Math.min(alength, a.length - astart)); final int len = Math.min(this.rowdef.width(this.sortColumn), Math.min(alength, a.length - astart));
while (i < len) if (a[astart + i++] != chunkcache[p++]) return false; while (i < len) if (a[astart + i++] != chunkcache[p++]) return false;
return true; return ((len == this.rowdef.width(this.sortColumn)) || (chunkcache[len] == 0)) ;
} }
public kelondroProfile profile() { public kelondroProfile profile() {
@ -358,6 +365,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
} }
} }
public void close() {
// just for compatibility with kelondroIndex interface; do nothing
}
public static void main(String[] args) { public static void main(String[] args) {
/* /*
String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" };

@ -211,13 +211,13 @@ public class plasmaDHTChunk {
try { try {
lurl = lurls.getEntry(iEntry.urlHash(), iEntry); lurl = lurls.getEntry(iEntry.urlHash(), iEntry);
if ((lurl == null) || (lurl.url() == null)) { if ((lurl == null) || (lurl.url() == null)) {
yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash()); //yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash());
notBoundCounter++; notBoundCounter++;
urlIter.remove(); urlIter.remove();
wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash(), true); wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash(), true);
} else { } else {
urlCache.put(iEntry.urlHash(), lurl); urlCache.put(iEntry.urlHash(), lurl);
yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + iEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash()); //yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + iEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash());
refcount++; refcount++;
} }
} catch (IOException e) { } catch (IOException e) {

Loading…
Cancel
Save