enhanced index storage data structure kelondroBytesIntMap

this stores now two index structures, one for data that is aquired during start-up
and one for data that is aquired during run-time. This reduces the grow factor, and should reduce the memory amount in case that a index-reorganisation happens.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3733 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 872eb46cb9
commit 5551ff5306

@ -25,53 +25,110 @@
package de.anomic.kelondro;
import java.io.IOException;
import java.util.Iterator;
public class kelondroBytesIntMap {
private kelondroIndex ki;
private kelondroRow rowdef;
private kelondroIndex index0, index1;
public kelondroBytesIntMap(kelondroIndex ki) throws IOException {
assert (ki.row().columns() == 2); // must be a key/index relation
assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long
this.ki = ki;
this.index0 = null; // not used
this.index1 = ki;
this.rowdef = ki.row();
}
public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) {
this.ki = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0), space);
this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0);
this.index0 = new kelondroRowSet(rowdef, space);
this.index1 = null; // to show that this is the initialization phase
}
public kelondroRow row() throws IOException {
return ki.row();
return index0.row();
}
public synchronized int geti(byte[] key) throws IOException {
assert (key != null);
//assert (!(serverLog.allZero(key)));
kelondroRow.Entry indexentry = ki.get(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
if (index0 != null) {
if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in geti");
}
kelondroRow.Entry indexentry = index0.get(key);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
if (indexentry != null) return (int) indexentry.getColLong(1);
}
if (index1 != null) {
kelondroRow.Entry indexentry = index1.get(key);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
if (indexentry != null) return (int) indexentry.getColLong(1);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return -1;
}
public synchronized int puti(byte[] key, int i) throws IOException {
assert i >= 0 : "i = " + i;
assert (key != null);
//assert (!(serverLog.allZero(key)));
kelondroRow.Entry newentry = ki.row().newEntry();
if (index0 != null) {
if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in puti");
}
// if the new entry is within the initialization part, just overwrite it
kelondroRow.Entry indexentry = index0.get(key);
if (indexentry != null) {
int oldi = (int) indexentry.getColLong(1);
indexentry.setCol(0, key);
indexentry.setCol(1, i);
index0.put(indexentry);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return oldi;
}
// else place it in the index1
}
// at this point index1 cannot be null
assert (index1 != null);
kelondroRow.Entry newentry = index1.row().newEntry();
newentry.setCol(0, key);
newentry.setCol(1, i);
kelondroRow.Entry oldentry = ki.put(newentry);
kelondroRow.Entry oldentry = index1.put(newentry);
if (oldentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) oldentry.getColLong(1);
}
public synchronized void addi(byte[] key, int i) throws IOException {
assert i >= 0 : "i = " + i;
assert (key != null);
assert index0 != null;
//assert index1 == null;
if (index1 != null) {
// the initialization phase is over, put this entry to the secondary index
puti(key, i);
return;
}
//assert (!(serverLog.allZero(key)));
kelondroRow.Entry newentry = ki.row().newEntry();
kelondroRow.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, i);
ki.addUnique(newentry);
index0.addUnique(newentry);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
}
public synchronized int removei(byte[] key) throws IOException {
@ -79,45 +136,153 @@ public class kelondroBytesIntMap {
//assert (!(serverLog.allZero(key)));
// returns the integer index of the key, if the key can be found and was removed
// and -1 if the key was not found.
if (ki.size() == 0) return -1;
kelondroRow.Entry indexentry = ki.remove(key);
if (index0 != null) {
if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in removei");
}
// if the new entry is within the initialization part, just overwrite it
kelondroRow.Entry indexentry = index0.remove(key);
if (indexentry != null) {
assert index0.remove(key) == null; // check if remove worked
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
// else remove it from the index1
}
// at this point index1 cannot be null
assert (index1 != null);
if (index1.size() == 0) return -1;
kelondroRow.Entry indexentry = index1.remove(key);
if (indexentry == null) return -1;
assert index1.remove(key) == null; // check if remove worked
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
public synchronized int removeonei() throws IOException {
if (ki.size() == 0) return -1;
kelondroRow.Entry indexentry = ki.removeOne();
assert (indexentry != null);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
if ((index1 != null) && (index1.size() != 0)) {
kelondroRow.Entry indexentry = index1.removeOne();
assert (indexentry != null);
if (indexentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
if ((index0 != null) && (index0.size() != 0)) {
kelondroRow.Entry indexentry = index0.removeOne();
assert (indexentry != null);
if (indexentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return -1;
}
public synchronized int size() {
return ki.size();
if ((index0 != null) && (index1 == null)) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.size();
}
if ((index0 == null) && (index1 != null)) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.size();
}
assert ((index0 != null) && (index1 != null));
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.size() + index1.size();
}
public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException {
// returns the row-iterator of the underlying kelondroIndex
// col[0] = key
// col[1] = integer as {b265}
return ki.rows(up, firstKey);
if ((index0 != null) && (index1 == null)) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in rows");
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.rows(up, firstKey);
}
if ((index0 == null) && (index1 != null)) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.rows(up, firstKey);
}
assert ((index0 != null) && (index1 != null));
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator(index0.rows(up, firstKey), index1.rows(up, firstKey), rowdef.objectOrder, kelondroMergeIterator.simpleMerge, true);
}
public kelondroProfile profile() {
return ki.profile();
if (index0 != null) return index0.profile();
if (index1 != null) return index1.profile();
return null;
}
public synchronized void close() {
ki.close();
if (index0 != null) index0.close();
if (index1 != null) index1.close();
}
public synchronized void sort() {
if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).sort();
public synchronized String consistencyAnalysis() {
String s0 = (index0 == null) ? "index0: is NULL" : ("index0: " + singleConsistency((kelondroRowSet) index0));
String s1 = (index1 == null) ? "index1: is NULL" : ("index1: " + singleConsistency((kelondroRowSet) index1));
String combined = "";
if ((index0 == null) && (index1 == null)) return "all null";
if ((index0 != null) && (index1 != null)) {
Iterator i;
try {
i = index0.rows(true, null);
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next();
if (index1.has(entry.getColBytes(0))) {
combined = combined + ", common = " + new String(entry.getColBytes(0));
}
}
} catch (IOException e) {}
}
return s0 + ", " + s1 + combined;
}
public synchronized void uniq(long time) {
if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).uniq(time);
public synchronized boolean consistencyAnalysis0() {
boolean s0 = ((index0 == null) || (!(index0 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index0);
boolean s1 = ((index1 == null) || (!(index1 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index1);
if (!(s0 && s1)) return false;
if ((index0 == null) && (index1 == null)) return true;
if ((index0 != null) && (index1 != null)) {
Iterator i;
try {
i = index0.rows(true, null);
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next();
if (index1.has(entry.getColBytes(0))) return false;
}
} catch (IOException e) {}
}
return true;
}
private String singleConsistency(kelondroRowSet rs) {
int s = rs.size();
rs.sort();
rs.uniq(10000);
if (rs.size() == s) return "set is sound"; else return "set has " + (rs.size() - s) + " double-entries";
}
private boolean singleConsistency0(kelondroRowSet rs) {
int s = rs.size();
rs.sort();
rs.uniq(10000);
return rs.size() == s;
}
}

@ -174,10 +174,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
}
System.out.print(" -ordering- ");
System.out.flush();
ri.sort();
int sbu = ri.size();
ri.uniq(10000);
if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
//int sbu = ri.size();
//if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
return ri;
}
@ -212,7 +210,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
int pos = index.geti(key);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size() + ", analysis: " + index.consistencyAnalysis();
if (pos < 0) return null;
// i may be greater than this.size(), because this table may have deleted entries
// the deleted entries are subtracted from the 'real' tablesize,

@ -75,7 +75,7 @@ public class kelondroIntBytesMap {
public byte[] putb(int ii, byte[] value) {
initPhase = false;
kelondroRow.Entry newentry = index1.row().newEntry();
kelondroRow.Entry newentry = rowdef.newEntry();
newentry.setCol(0, (long) ii);
newentry.setCol(1, value);
kelondroRow.Entry indexentry = index0.get(kelondroNaturalOrder.encodeLong((long) ii, 4));

Loading…
Cancel
Save