enhanced index storage data structure kelondroBytesIntMap

this stores now two index structures, one for data that is aquired during start-up
and one for data that is aquired during run-time. This reduces the grow factor, and should reduce the memory amount in case that a index-reorganisation happens.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3733 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 872eb46cb9
commit 5551ff5306

@ -25,53 +25,110 @@
package de.anomic.kelondro; package de.anomic.kelondro;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
public class kelondroBytesIntMap { public class kelondroBytesIntMap {
private kelondroIndex ki; private kelondroRow rowdef;
private kelondroIndex index0, index1;
public kelondroBytesIntMap(kelondroIndex ki) throws IOException { public kelondroBytesIntMap(kelondroIndex ki) throws IOException {
assert (ki.row().columns() == 2); // must be a key/index relation assert (ki.row().columns() == 2); // must be a key/index relation
assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long
this.ki = ki; this.index0 = null; // not used
this.index1 = ki;
this.rowdef = ki.row();
} }
public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) { public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) {
this.ki = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0), space); this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0);
this.index0 = new kelondroRowSet(rowdef, space);
this.index1 = null; // to show that this is the initialization phase
} }
public kelondroRow row() throws IOException { public kelondroRow row() throws IOException {
return ki.row(); return index0.row();
} }
public synchronized int geti(byte[] key) throws IOException { public synchronized int geti(byte[] key) throws IOException {
assert (key != null); assert (key != null);
//assert (!(serverLog.allZero(key))); //assert (!(serverLog.allZero(key)));
kelondroRow.Entry indexentry = ki.get(key); if (index0 != null) {
if (indexentry == null) return -1; if (index1 == null) {
return (int) indexentry.getColLong(1); // finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in geti");
}
kelondroRow.Entry indexentry = index0.get(key);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
if (indexentry != null) return (int) indexentry.getColLong(1);
}
if (index1 != null) {
kelondroRow.Entry indexentry = index1.get(key);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
if (indexentry != null) return (int) indexentry.getColLong(1);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return -1;
} }
public synchronized int puti(byte[] key, int i) throws IOException { public synchronized int puti(byte[] key, int i) throws IOException {
assert i >= 0 : "i = " + i; assert i >= 0 : "i = " + i;
assert (key != null); assert (key != null);
//assert (!(serverLog.allZero(key))); //assert (!(serverLog.allZero(key)));
kelondroRow.Entry newentry = ki.row().newEntry(); if (index0 != null) {
if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in puti");
}
// if the new entry is within the initialization part, just overwrite it
kelondroRow.Entry indexentry = index0.get(key);
if (indexentry != null) {
int oldi = (int) indexentry.getColLong(1);
indexentry.setCol(0, key);
indexentry.setCol(1, i);
index0.put(indexentry);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return oldi;
}
// else place it in the index1
}
// at this point index1 cannot be null
assert (index1 != null);
kelondroRow.Entry newentry = index1.row().newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, i); newentry.setCol(1, i);
kelondroRow.Entry oldentry = ki.put(newentry); kelondroRow.Entry oldentry = index1.put(newentry);
if (oldentry == null) return -1; if (oldentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) oldentry.getColLong(1); return (int) oldentry.getColLong(1);
} }
public synchronized void addi(byte[] key, int i) throws IOException { public synchronized void addi(byte[] key, int i) throws IOException {
assert i >= 0 : "i = " + i; assert i >= 0 : "i = " + i;
assert (key != null); assert (key != null);
assert index0 != null;
//assert index1 == null;
if (index1 != null) {
// the initialization phase is over, put this entry to the secondary index
puti(key, i);
return;
}
//assert (!(serverLog.allZero(key))); //assert (!(serverLog.allZero(key)));
kelondroRow.Entry newentry = ki.row().newEntry(); kelondroRow.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, i); newentry.setCol(1, i);
ki.addUnique(newentry); index0.addUnique(newentry);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
} }
public synchronized int removei(byte[] key) throws IOException { public synchronized int removei(byte[] key) throws IOException {
@ -79,45 +136,153 @@ public class kelondroBytesIntMap {
//assert (!(serverLog.allZero(key))); //assert (!(serverLog.allZero(key)));
// returns the integer index of the key, if the key can be found and was removed // returns the integer index of the key, if the key can be found and was removed
// and -1 if the key was not found. // and -1 if the key was not found.
if (ki.size() == 0) return -1; if (index0 != null) {
kelondroRow.Entry indexentry = ki.remove(key); if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in removei");
}
// if the new entry is within the initialization part, just overwrite it
kelondroRow.Entry indexentry = index0.remove(key);
if (indexentry != null) {
assert index0.remove(key) == null; // check if remove worked
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
// else remove it from the index1
}
// at this point index1 cannot be null
assert (index1 != null);
if (index1.size() == 0) return -1;
kelondroRow.Entry indexentry = index1.remove(key);
if (indexentry == null) return -1; if (indexentry == null) return -1;
assert index1.remove(key) == null; // check if remove worked
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1); return (int) indexentry.getColLong(1);
} }
public synchronized int removeonei() throws IOException { public synchronized int removeonei() throws IOException {
if (ki.size() == 0) return -1; if ((index1 != null) && (index1.size() != 0)) {
kelondroRow.Entry indexentry = ki.removeOne(); kelondroRow.Entry indexentry = index1.removeOne();
assert (indexentry != null); assert (indexentry != null);
if (indexentry == null) return -1; if (indexentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1); return (int) indexentry.getColLong(1);
} }
if ((index0 != null) && (index0.size() != 0)) {
kelondroRow.Entry indexentry = index0.removeOne();
assert (indexentry != null);
if (indexentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return -1;
}
public synchronized int size() { public synchronized int size() {
return ki.size(); if ((index0 != null) && (index1 == null)) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.size();
}
if ((index0 == null) && (index1 != null)) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.size();
}
assert ((index0 != null) && (index1 != null));
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.size() + index1.size();
} }
public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException {
// returns the row-iterator of the underlying kelondroIndex // returns the row-iterator of the underlying kelondroIndex
// col[0] = key // col[0] = key
// col[1] = integer as {b265} // col[1] = integer as {b265}
return ki.rows(up, firstKey); if ((index0 != null) && (index1 == null)) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq(10000);
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in rows");
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.rows(up, firstKey);
}
if ((index0 == null) && (index1 != null)) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.rows(up, firstKey);
}
assert ((index0 != null) && (index1 != null));
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator(index0.rows(up, firstKey), index1.rows(up, firstKey), rowdef.objectOrder, kelondroMergeIterator.simpleMerge, true);
} }
public kelondroProfile profile() { public kelondroProfile profile() {
return ki.profile(); if (index0 != null) return index0.profile();
if (index1 != null) return index1.profile();
return null;
} }
public synchronized void close() { public synchronized void close() {
ki.close(); if (index0 != null) index0.close();
if (index1 != null) index1.close();
} }
public synchronized void sort() { public synchronized String consistencyAnalysis() {
if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).sort(); String s0 = (index0 == null) ? "index0: is NULL" : ("index0: " + singleConsistency((kelondroRowSet) index0));
String s1 = (index1 == null) ? "index1: is NULL" : ("index1: " + singleConsistency((kelondroRowSet) index1));
String combined = "";
if ((index0 == null) && (index1 == null)) return "all null";
if ((index0 != null) && (index1 != null)) {
Iterator i;
try {
i = index0.rows(true, null);
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next();
if (index1.has(entry.getColBytes(0))) {
combined = combined + ", common = " + new String(entry.getColBytes(0));
}
}
} catch (IOException e) {}
}
return s0 + ", " + s1 + combined;
} }
public synchronized void uniq(long time) { public synchronized boolean consistencyAnalysis0() {
if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).uniq(time); boolean s0 = ((index0 == null) || (!(index0 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index0);
boolean s1 = ((index1 == null) || (!(index1 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index1);
if (!(s0 && s1)) return false;
if ((index0 == null) && (index1 == null)) return true;
if ((index0 != null) && (index1 != null)) {
Iterator i;
try {
i = index0.rows(true, null);
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = (kelondroRow.Entry) i.next();
if (index1.has(entry.getColBytes(0))) return false;
}
} catch (IOException e) {}
}
return true;
} }
private String singleConsistency(kelondroRowSet rs) {
int s = rs.size();
rs.sort();
rs.uniq(10000);
if (rs.size() == s) return "set is sound"; else return "set has " + (rs.size() - s) + " double-entries";
}
private boolean singleConsistency0(kelondroRowSet rs) {
int s = rs.size();
rs.sort();
rs.uniq(10000);
return rs.size() == s;
}
} }

@ -174,10 +174,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
} }
System.out.print(" -ordering- "); System.out.print(" -ordering- ");
System.out.flush(); System.out.flush();
ri.sort(); //int sbu = ri.size();
int sbu = ri.size(); //if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
ri.uniq(10000);
if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
return ri; return ri;
} }
@ -212,7 +210,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
public synchronized kelondroRow.Entry get(byte[] key) throws IOException { public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
int pos = index.geti(key); int pos = index.geti(key);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size() + ", analysis: " + index.consistencyAnalysis();
if (pos < 0) return null; if (pos < 0) return null;
// i may be greater than this.size(), because this table may have deleted entries // i may be greater than this.size(), because this table may have deleted entries
// the deleted entries are subtracted from the 'real' tablesize, // the deleted entries are subtracted from the 'real' tablesize,

@ -75,7 +75,7 @@ public class kelondroIntBytesMap {
public byte[] putb(int ii, byte[] value) { public byte[] putb(int ii, byte[] value) {
initPhase = false; initPhase = false;
kelondroRow.Entry newentry = index1.row().newEntry(); kelondroRow.Entry newentry = rowdef.newEntry();
newentry.setCol(0, (long) ii); newentry.setCol(0, (long) ii);
newentry.setCol(1, value); newentry.setCol(1, value);
kelondroRow.Entry indexentry = index0.get(kelondroNaturalOrder.encodeLong((long) ii, 4)); kelondroRow.Entry indexentry = index0.get(kelondroNaturalOrder.encodeLong((long) ii, 4));

Loading…
Cancel
Save