- added kelondroTree index option to kelondroFlexTable

- automatic generation of index file when index is too large for RAM


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2261 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent dd2865178a
commit 6af70febef

@ -187,7 +187,7 @@ public class dbtest {
}
if (dbe.equals("kelondroFlexTable")) {
File tablepath = new File(tablename).getParentFile();
table = new kelondroFlexTable(tablepath, new File(tablename).getName(), testRow, true);
table = new kelondroFlexTable(tablepath, new File(tablename).getName(), buffer, testRow, true);
}
if (dbe.equals("mysql")) {
table = new dbTable("mysql", testRow);

@ -24,45 +24,42 @@
package de.anomic.kelondro;
public class kelondroBytesIntMap extends kelondroRowBufferedSet {
import java.io.IOException;
public class kelondroBytesIntMap {
private kelondroIndex ki;
public kelondroBytesIntMap(int keySize, int initSize) {
super(new kelondroRow(new int[]{keySize, 4}), initSize);
// initialize ordering
super.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
public kelondroBytesIntMap(kelondroIndex ki) throws IOException {
assert (ki.row().columns() == 2); // must be a key/index relation
assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long
this.ki = ki;
}
public int geti(byte[] key) {
kelondroRow.Entry indexentry = super.get(key);
public int geti(byte[] key) throws IOException {
kelondroRow.Entry indexentry = ki.get(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLongB256(1);
}
public int puti(byte[] key, int i) {
kelondroRow.Entry newentry = rowdef.newEntry();
public int puti(byte[] key, int i) throws IOException {
kelondroRow.Entry newentry = ki.row().newEntry();
newentry.setCol(0, key);
newentry.setColLongB256(1, i);
kelondroRow.Entry oldentry = super.put(newentry);
kelondroRow.Entry oldentry = ki.put(newentry);
if (oldentry == null) return -1;
return (int) oldentry.getColLongB256(1);
}
public void addi(byte[] key, int i) {
kelondroRow.Entry indexentry = rowdef.newEntry();
indexentry.setCol(0, key);
indexentry.setColLongB256(1, i);
add(indexentry);
}
public int removei(byte[] key) {
if (size() == 0) {
if (System.currentTimeMillis() - this.lastTimeWrote > 10000) this.trim();
return -1;
}
kelondroRow.Entry indexentry = removeMarked(key);
public int removei(byte[] key) throws IOException {
if (ki.size() == 0) return -1;
kelondroRow.Entry indexentry = ki.remove(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLongB256(1);
}
public int size() throws IOException {
return ki.size();
}
}

@ -33,24 +33,58 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
private kelondroBytesIntMap index;
public kelondroFlexTable(File path, String tablename, kelondroRow rowdef, boolean exitOnFail) throws IOException {
public kelondroFlexTable(File path, String tablename, long buffersize, kelondroRow rowdef, boolean exitOnFail) throws IOException {
super(path, tablename, rowdef, exitOnFail);
// fill the index
this.index = new kelondroBytesIntMap(super.row().width(0), 0);
/*
kelondroFixedWidthArray indexArray = new kelondroFixedWidthArray(new File(path, colfilename(0,0)));
for (int i = 0; i < indexArray.size(); i++) index.put(indexArray.get(i).getColBytes(0), new Integer(i));
indexArray.close();
*/
System.out.print("*** Loading " + path);
File newpath = new File(path, tablename + ".table");
File indexfile = new File(newpath, "col.000.index");
kelondroIndex ki = null;
String description = new String(this.col[0].getDescription());
System.out.println("*** Last Startup time: " + description.substring(4));
long start = System.currentTimeMillis();
if (indexfile.exists()) {
// use existing index file
System.out.println("*** Using File index " + indexfile);
ki = new kelondroTree(indexfile, buffersize, 10);
} else if (size() > 100000) {
// generate new index file
System.out.print("*** Genrating File index for " + size() + " entries from " + indexfile);
ki = initializeTreeIndex(indexfile, buffersize);
System.out.println(" -done-");
System.out.println(ki.size()
+ " entries indexed from "
+ super.col[0].size() + " keys.");
} else {
// fill the index
System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath);
ki = initializeRamIndex();
System.out.println(" -done-");
System.out.println(ki.size()
+ " index entries initialized and sorted from "
+ super.col[0].size() + " keys.");
}
// assign index to wrapper
index = new kelondroBytesIntMap(ki);
description = "stt=" + Long.toString(System.currentTimeMillis() - start) + ";";
super.col[0].setDescription(description.getBytes());
}
private kelondroIndex initializeRamIndex() throws IOException {
kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new int[]{super.row().width(0), 4}), 0);
ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
Iterator content = super.col[0].contentNodes();
kelondroRecords.Node node;
kelondroRow.Entry indexentry;
int i;
while (content.hasNext()) {
node = (kelondroRecords.Node) content.next();
i = node.handle().hashCode();
index.addi(node.getValueRow(), i);
indexentry = ri.rowdef.newEntry();
indexentry.setCol(0, node.getValueRow());
indexentry.setColLongB256(1, i);
ri.add(indexentry);
if ((i % 10000) == 0) {
System.out.print('.');
System.out.flush();
@ -58,33 +92,36 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
System.out.print(" -ordering- ");
System.out.flush();
this.index.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
index.shape();
System.out.println(" -done-");
System.out.println(index.size() + " index entries initialized and sorted from " + super.col[0].size() + " keys.");
ri.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
ri.shape();
return ri;
}
/*
private final static byte[] read(File source) throws IOException {
byte[] buffer = new byte[(int) source.length()];
InputStream fis = null;
try {
fis = new FileInputStream(source);
int p = 0, c;
while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c;
} finally {
if (fis != null) try { fis.close(); } catch (Exception e) {}
private kelondroIndex initializeTreeIndex(File indexfile, long buffersize) throws IOException {
kelondroTree index = new kelondroTree(indexfile, buffersize, 10, rowdef.width(0), 4, true);
Iterator content = super.col[0].contentNodes();
kelondroRecords.Node node;
kelondroRow.Entry indexentry;
int i;
while (content.hasNext()) {
node = (kelondroRecords.Node) content.next();
i = node.handle().hashCode();
indexentry = index.row().newEntry();
indexentry.setCol(0, node.getValueRow());
indexentry.setColLongB256(1, i);
index.put(indexentry);
if ((i % 10000) == 0) {
System.out.print('.');
System.out.flush();
}
}
return buffer;
return index;
}
*/
public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
synchronized (index) {
int i = index.geti(key);
if (i >= this.size()) {
System.out.println("errror");
}
if (i >= this.size()) System.out.println("error");
if (i < 0) return null;
return super.get(i);
}

@ -24,7 +24,7 @@
package de.anomic.kelondro;
import java.util.Random;
//import java.util.Random;
public class kelondroIntBytesMap extends kelondroRowBufferedSet {
@ -70,7 +70,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet {
public static void main(String[] args) {
long start = System.currentTimeMillis();
kelondroIntBytesMap c = new kelondroIntBytesMap(30, 0);
Random random = new Random(0);
//Random random = new Random(0);
int x;
for (int i = 0; i < 100000; i++) {
//x = random.nextInt(100000);

@ -462,7 +462,7 @@ public class kelondroRecords {
public String cacheNodeStatusString() {
return
"cacheMaxSize=" + cacheSize +
", cacheCurrSize=" + cacheHeaders.size() +
", cacheCurrSize=" + ((cacheHeaders == null) ? 0 : cacheHeaders.size()) +
", readHit=" + readHit +
", readMiss=" + readMiss +
", writeUnique=" + writeUnique +

@ -62,6 +62,10 @@ public class kelondroRowCollection {
this.lastTimeWrote = System.currentTimeMillis();
}
public kelondroRow row() {
return this.rowdef;
}
private final void ensureSize(int elements) {
int needed = elements * rowdef.objectsize();
if (chunkcache.length >= needed) return;

@ -28,7 +28,7 @@ import java.util.TreeSet;
import java.util.Iterator;
import java.util.Random;
public class kelondroRowSet extends kelondroRowCollection {
public class kelondroRowSet extends kelondroRowCollection implements kelondroIndex {
private static final int collectionReSortLimit = 90;
private static final int removeMaxSize = 100;
@ -87,6 +87,10 @@ public class kelondroRowSet extends kelondroRowCollection {
return super.size() - removeMarker.size();
}
public kelondroRow.Entry remove(byte[] a) {
return removeMarked(a);
}
public kelondroRow.Entry removeMarked(byte[] a) {
return removeMarked(a, 0, a.length);
}

Loading…
Cancel
Save