From 5551ff5306d0535561ec01850e7af69c60c8d58e Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 16 May 2007 14:36:56 +0000 Subject: [PATCH] enhanced index storage data structure kelondroBytesIntMap this stores now two index structures, one for data that is aquired during start-up and one for data that is aquired during run-time. This reduces the grow factor, and should reduce the memory amount in case that a index-reorganisation happens. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3733 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../anomic/kelondro/kelondroBytesIntMap.java | 219 +++++++++++++++--- .../de/anomic/kelondro/kelondroFlexTable.java | 8 +- .../anomic/kelondro/kelondroIntBytesMap.java | 2 +- 3 files changed, 196 insertions(+), 33 deletions(-) diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index 7cb5df5ce..cfa27e26e 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -25,53 +25,110 @@ package de.anomic.kelondro; import java.io.IOException; +import java.util.Iterator; public class kelondroBytesIntMap { - private kelondroIndex ki; + private kelondroRow rowdef; + private kelondroIndex index0, index1; public kelondroBytesIntMap(kelondroIndex ki) throws IOException { assert (ki.row().columns() == 2); // must be a key/index relation assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long - this.ki = ki; + this.index0 = null; // not used + this.index1 = ki; + this.rowdef = ki.row(); } public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) { - this.ki = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0), space); + this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0); + this.index0 = new kelondroRowSet(rowdef, space); + this.index1 = null; // to show that this is the initialization phase } public kelondroRow row() throws IOException { - return ki.row(); + return index0.row(); } public synchronized int geti(byte[] key) throws IOException { assert (key != null); //assert (!(serverLog.allZero(key))); - kelondroRow.Entry indexentry = ki.get(key); - if (indexentry == null) return -1; - return (int) indexentry.getColLong(1); + if (index0 != null) { + if (index1 == null) { + // finish initialization phase + if (index0 instanceof kelondroRowSet) { + ((kelondroRowSet) index0).sort(); + ((kelondroRowSet) index0).uniq(10000); + } + index1 = new kelondroRowSet(rowdef, 0); + //System.out.println("finished initialization phase at size = " + index0.size() + " in geti"); + } + kelondroRow.Entry indexentry = index0.get(key); + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + if (indexentry != null) return (int) indexentry.getColLong(1); + } + if (index1 != null) { + kelondroRow.Entry indexentry = index1.get(key); + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + if (indexentry != null) return (int) indexentry.getColLong(1); + } + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return -1; } public synchronized int puti(byte[] key, int i) throws IOException { assert i >= 0 : "i = " + i; assert (key != null); //assert (!(serverLog.allZero(key))); - kelondroRow.Entry newentry = ki.row().newEntry(); + if (index0 != null) { + if (index1 == null) { + // finish initialization phase + if (index0 instanceof kelondroRowSet) { + ((kelondroRowSet) index0).sort(); + ((kelondroRowSet) index0).uniq(10000); + } + index1 = new kelondroRowSet(rowdef, 0); + //System.out.println("finished initialization phase at size = " + index0.size() + " in puti"); + } + // if the new entry is within the initialization part, just overwrite it + kelondroRow.Entry indexentry = index0.get(key); + if (indexentry != null) { + int oldi = (int) indexentry.getColLong(1); + indexentry.setCol(0, key); + indexentry.setCol(1, i); + index0.put(indexentry); + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return oldi; + } + // else place it in the index1 + } + // at this point index1 cannot be null + assert (index1 != null); + kelondroRow.Entry newentry = index1.row().newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); - kelondroRow.Entry oldentry = ki.put(newentry); + kelondroRow.Entry oldentry = index1.put(newentry); if (oldentry == null) return -1; + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return (int) oldentry.getColLong(1); } public synchronized void addi(byte[] key, int i) throws IOException { assert i >= 0 : "i = " + i; assert (key != null); + assert index0 != null; + //assert index1 == null; + if (index1 != null) { + // the initialization phase is over, put this entry to the secondary index + puti(key, i); + return; + } //assert (!(serverLog.allZero(key))); - kelondroRow.Entry newentry = ki.row().newEntry(); + kelondroRow.Entry newentry = this.rowdef.newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); - ki.addUnique(newentry); + index0.addUnique(newentry); + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); } public synchronized int removei(byte[] key) throws IOException { @@ -79,45 +136,153 @@ public class kelondroBytesIntMap { //assert (!(serverLog.allZero(key))); // returns the integer index of the key, if the key can be found and was removed // and -1 if the key was not found. - if (ki.size() == 0) return -1; - kelondroRow.Entry indexentry = ki.remove(key); + if (index0 != null) { + if (index1 == null) { + // finish initialization phase + if (index0 instanceof kelondroRowSet) { + ((kelondroRowSet) index0).sort(); + ((kelondroRowSet) index0).uniq(10000); + } + index1 = new kelondroRowSet(rowdef, 0); + //System.out.println("finished initialization phase at size = " + index0.size() + " in removei"); + } + // if the new entry is within the initialization part, just overwrite it + kelondroRow.Entry indexentry = index0.remove(key); + if (indexentry != null) { + assert index0.remove(key) == null; // check if remove worked + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return (int) indexentry.getColLong(1); + } + // else remove it from the index1 + } + // at this point index1 cannot be null + assert (index1 != null); + if (index1.size() == 0) return -1; + kelondroRow.Entry indexentry = index1.remove(key); if (indexentry == null) return -1; + assert index1.remove(key) == null; // check if remove worked + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return (int) indexentry.getColLong(1); } public synchronized int removeonei() throws IOException { - if (ki.size() == 0) return -1; - kelondroRow.Entry indexentry = ki.removeOne(); - assert (indexentry != null); - if (indexentry == null) return -1; - return (int) indexentry.getColLong(1); + if ((index1 != null) && (index1.size() != 0)) { + kelondroRow.Entry indexentry = index1.removeOne(); + assert (indexentry != null); + if (indexentry == null) return -1; + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return (int) indexentry.getColLong(1); + } + if ((index0 != null) && (index0.size() != 0)) { + kelondroRow.Entry indexentry = index0.removeOne(); + assert (indexentry != null); + if (indexentry == null) return -1; + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return (int) indexentry.getColLong(1); + } + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return -1; } public synchronized int size() { - return ki.size(); + if ((index0 != null) && (index1 == null)) { + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return index0.size(); + } + if ((index0 == null) && (index1 != null)) { + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return index1.size(); + } + assert ((index0 != null) && (index1 != null)); + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return index0.size() + index1.size(); } public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { // returns the row-iterator of the underlying kelondroIndex // col[0] = key // col[1] = integer as {b265} - return ki.rows(up, firstKey); + if ((index0 != null) && (index1 == null)) { + // finish initialization phase + if (index0 instanceof kelondroRowSet) { + ((kelondroRowSet) index0).sort(); + ((kelondroRowSet) index0).uniq(10000); + } + index1 = new kelondroRowSet(rowdef, 0); + //System.out.println("finished initialization phase at size = " + index0.size() + " in rows"); + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return index0.rows(up, firstKey); + } + if ((index0 == null) && (index1 != null)) { + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return index1.rows(up, firstKey); + } + assert ((index0 != null) && (index1 != null)); + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return new kelondroMergeIterator(index0.rows(up, firstKey), index1.rows(up, firstKey), rowdef.objectOrder, kelondroMergeIterator.simpleMerge, true); } public kelondroProfile profile() { - return ki.profile(); + if (index0 != null) return index0.profile(); + if (index1 != null) return index1.profile(); + return null; } public synchronized void close() { - ki.close(); + if (index0 != null) index0.close(); + if (index1 != null) index1.close(); } - - public synchronized void sort() { - if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).sort(); + + public synchronized String consistencyAnalysis() { + String s0 = (index0 == null) ? "index0: is NULL" : ("index0: " + singleConsistency((kelondroRowSet) index0)); + String s1 = (index1 == null) ? "index1: is NULL" : ("index1: " + singleConsistency((kelondroRowSet) index1)); + String combined = ""; + if ((index0 == null) && (index1 == null)) return "all null"; + if ((index0 != null) && (index1 != null)) { + Iterator i; + try { + i = index0.rows(true, null); + kelondroRow.Entry entry; + while (i.hasNext()) { + entry = (kelondroRow.Entry) i.next(); + if (index1.has(entry.getColBytes(0))) { + combined = combined + ", common = " + new String(entry.getColBytes(0)); + } + } + } catch (IOException e) {} + } + return s0 + ", " + s1 + combined; } - public synchronized void uniq(long time) { - if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).uniq(time); + public synchronized boolean consistencyAnalysis0() { + boolean s0 = ((index0 == null) || (!(index0 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index0); + boolean s1 = ((index1 == null) || (!(index1 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index1); + if (!(s0 && s1)) return false; + if ((index0 == null) && (index1 == null)) return true; + if ((index0 != null) && (index1 != null)) { + Iterator i; + try { + i = index0.rows(true, null); + kelondroRow.Entry entry; + while (i.hasNext()) { + entry = (kelondroRow.Entry) i.next(); + if (index1.has(entry.getColBytes(0))) return false; + } + } catch (IOException e) {} + } + return true; } + private String singleConsistency(kelondroRowSet rs) { + int s = rs.size(); + rs.sort(); + rs.uniq(10000); + if (rs.size() == s) return "set is sound"; else return "set has " + (rs.size() - s) + " double-entries"; + } + private boolean singleConsistency0(kelondroRowSet rs) { + int s = rs.size(); + rs.sort(); + rs.uniq(10000); + return rs.size() == s; + } } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 3e2c65310..fe84a4a35 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -174,10 +174,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } System.out.print(" -ordering- "); System.out.flush(); - ri.sort(); - int sbu = ri.size(); - ri.uniq(10000); - if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size()); + //int sbu = ri.size(); + //if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size()); return ri; } @@ -212,7 +210,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr public synchronized kelondroRow.Entry get(byte[] key) throws IOException { int pos = index.geti(key); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); + assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size() + ", analysis: " + index.consistencyAnalysis(); if (pos < 0) return null; // i may be greater than this.size(), because this table may have deleted entries // the deleted entries are subtracted from the 'real' tablesize, diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index 29b9a0f73..f9d088ccd 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -75,7 +75,7 @@ public class kelondroIntBytesMap { public byte[] putb(int ii, byte[] value) { initPhase = false; - kelondroRow.Entry newentry = index1.row().newEntry(); + kelondroRow.Entry newentry = rowdef.newEntry(); newentry.setCol(0, (long) ii); newentry.setCol(1, value); kelondroRow.Entry indexentry = index0.get(kelondroNaturalOrder.encodeLong((long) ii, 4));