From 5551ff5306d0535561ec01850e7af69c60c8d58e Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 16 May 2007 14:36:56 +0000
Subject: [PATCH] enhanced index storage data structure kelondroBytesIntMap
 this stores now two index structures, one for data that is aquired during
 start-up and one for data that is aquired during run-time. This reduces the
 grow factor, and should reduce the memory amount in case that a
 index-reorganisation happens.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3733 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 .../anomic/kelondro/kelondroBytesIntMap.java  | 219 +++++++++++++++---
 .../de/anomic/kelondro/kelondroFlexTable.java |   8 +-
 .../anomic/kelondro/kelondroIntBytesMap.java  |   2 +-
 3 files changed, 196 insertions(+), 33 deletions(-)

diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java
index 7cb5df5ce..cfa27e26e 100644
--- a/source/de/anomic/kelondro/kelondroBytesIntMap.java
+++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java
@@ -25,53 +25,110 @@
 package de.anomic.kelondro;
 
 import java.io.IOException;
+import java.util.Iterator;
 
 public class kelondroBytesIntMap {
     
-    private kelondroIndex ki;
+    private kelondroRow rowdef;
+    private kelondroIndex index0, index1;
     
     public kelondroBytesIntMap(kelondroIndex ki) throws IOException {
         assert (ki.row().columns() == 2); // must be a key/index relation
         assert (ki.row().width(1) == 4);  // the value must be a b256-encoded int, 4 bytes long
-        this.ki = ki;
+        this.index0 = null; // not used
+        this.index1 = ki;
+        this.rowdef = ki.row();
     }
     
     public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) {
-        this.ki = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0), space);
+        this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0);
+        this.index0 = new kelondroRowSet(rowdef, space);
+        this.index1 = null; // to show that this is the initialization phase
     }
     
     public kelondroRow row() throws IOException {
-        return ki.row();
+        return index0.row();
     }
     
     public synchronized int geti(byte[] key) throws IOException {
         assert (key != null);
         //assert (!(serverLog.allZero(key)));
-        kelondroRow.Entry indexentry = ki.get(key);
-        if (indexentry == null) return -1;
-        return (int) indexentry.getColLong(1);
+        if (index0 != null) {
+            if (index1 == null) {
+                // finish initialization phase
+                if (index0 instanceof kelondroRowSet) {
+                    ((kelondroRowSet) index0).sort();
+                    ((kelondroRowSet) index0).uniq(10000);
+                }
+                index1 = new kelondroRowSet(rowdef, 0);
+                //System.out.println("finished initialization phase at size = " + index0.size() + " in geti");
+            }
+            kelondroRow.Entry indexentry = index0.get(key);
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            if (indexentry != null) return (int) indexentry.getColLong(1);
+        }
+        if (index1 != null) {
+            kelondroRow.Entry indexentry = index1.get(key);
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            if (indexentry != null) return (int) indexentry.getColLong(1);
+        }
+        //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+        return -1;
     }
     
     public synchronized int puti(byte[] key, int i) throws IOException {
     	assert i >= 0 : "i = " + i;
         assert (key != null);
         //assert (!(serverLog.allZero(key)));
-        kelondroRow.Entry newentry = ki.row().newEntry();
+        if (index0 != null) {
+            if (index1 == null) {
+                // finish initialization phase
+                if (index0 instanceof kelondroRowSet) {
+                    ((kelondroRowSet) index0).sort();
+                    ((kelondroRowSet) index0).uniq(10000);
+                }
+                index1 = new kelondroRowSet(rowdef, 0);
+                //System.out.println("finished initialization phase at size = " + index0.size() + " in puti");
+            }
+            // if the new entry is within the initialization part, just overwrite it
+            kelondroRow.Entry indexentry = index0.get(key);
+            if (indexentry != null) {
+                int oldi = (int) indexentry.getColLong(1);
+                indexentry.setCol(0, key);
+                indexentry.setCol(1, i);
+                index0.put(indexentry);
+                //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+                return oldi;
+            }
+            // else place it in the index1
+        }
+        // at this point index1 cannot be null
+        assert (index1 != null);
+        kelondroRow.Entry newentry = index1.row().newEntry();
         newentry.setCol(0, key);
         newentry.setCol(1, i);
-        kelondroRow.Entry oldentry = ki.put(newentry);
+        kelondroRow.Entry oldentry = index1.put(newentry);
         if (oldentry == null) return -1;
+        //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
         return (int) oldentry.getColLong(1);
     }
     
     public synchronized void addi(byte[] key, int i) throws IOException {
     	assert i >= 0 : "i = " + i;
         assert (key != null);
+        assert index0 != null;
+        //assert index1 == null;
+        if (index1 != null) {
+            // the initialization phase is over, put this entry to the secondary index
+            puti(key, i);
+            return;
+        }
         //assert (!(serverLog.allZero(key)));
-        kelondroRow.Entry newentry = ki.row().newEntry();
+        kelondroRow.Entry newentry = this.rowdef.newEntry();
         newentry.setCol(0, key);
         newentry.setCol(1, i);
-        ki.addUnique(newentry);
+        index0.addUnique(newentry);
+        //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
     }
     
     public synchronized int removei(byte[] key) throws IOException {
@@ -79,45 +136,153 @@ public class kelondroBytesIntMap {
         //assert (!(serverLog.allZero(key)));
         // returns the integer index of the key, if the key can be found and was removed
         // and -1 if the key was not found.
-        if (ki.size() == 0) return -1;
-        kelondroRow.Entry indexentry = ki.remove(key);
+        if (index0 != null) {
+            if (index1 == null) {
+                // finish initialization phase
+                if (index0 instanceof kelondroRowSet) {
+                    ((kelondroRowSet) index0).sort();
+                    ((kelondroRowSet) index0).uniq(10000);
+                }
+                index1 = new kelondroRowSet(rowdef, 0);
+                //System.out.println("finished initialization phase at size = " + index0.size() + " in removei");
+            }
+            // if the new entry is within the initialization part, just overwrite it
+            kelondroRow.Entry indexentry = index0.remove(key);
+            if (indexentry != null) {
+                assert index0.remove(key) == null; // check if remove worked
+                //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+                return (int) indexentry.getColLong(1);
+            }
+            // else remove it from the index1
+        }
+        // at this point index1 cannot be null
+        assert (index1 != null);
+        if (index1.size() == 0) return -1;
+        kelondroRow.Entry indexentry = index1.remove(key);
         if (indexentry == null) return -1;
+        assert index1.remove(key) == null; // check if remove worked
+        //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
         return (int) indexentry.getColLong(1);
     }
 
     public synchronized int removeonei() throws IOException {
-        if (ki.size() == 0) return -1;
-        kelondroRow.Entry indexentry = ki.removeOne();
-        assert (indexentry != null);
-        if (indexentry == null) return -1;
-        return (int) indexentry.getColLong(1);
+        if ((index1 != null) && (index1.size() != 0)) {
+            kelondroRow.Entry indexentry = index1.removeOne();
+            assert (indexentry != null);
+            if (indexentry == null) return -1;
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            return (int) indexentry.getColLong(1);
+        }
+        if ((index0 != null) && (index0.size() != 0)) {
+            kelondroRow.Entry indexentry = index0.removeOne();
+            assert (indexentry != null);
+            if (indexentry == null) return -1;
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            return (int) indexentry.getColLong(1);
+        }
+        //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+        return -1;
     }
     
     public synchronized int size() {
-        return ki.size();
+        if ((index0 != null) && (index1 == null)) {
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            return index0.size();
+        }
+        if ((index0 == null) && (index1 != null)) {
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            return index1.size();
+        }
+        assert ((index0 != null) && (index1 != null));
+        //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+        return index0.size() + index1.size();
     }
     
     public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException {
         // returns the row-iterator of the underlying kelondroIndex
         // col[0] = key
         // col[1] = integer as {b265}
-        return ki.rows(up, firstKey);
+        if ((index0 != null) && (index1 == null)) {
+            // finish initialization phase
+            if (index0 instanceof kelondroRowSet) {
+                ((kelondroRowSet) index0).sort();
+                ((kelondroRowSet) index0).uniq(10000);
+            }
+            index1 = new kelondroRowSet(rowdef, 0);
+            //System.out.println("finished initialization phase at size = " + index0.size() + " in rows");
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            return index0.rows(up, firstKey);
+        }
+        if ((index0 == null) && (index1 != null)) {
+            //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+            return index1.rows(up, firstKey);
+        }
+        assert ((index0 != null) && (index1 != null));
+        //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
+        return new kelondroMergeIterator(index0.rows(up, firstKey), index1.rows(up, firstKey), rowdef.objectOrder, kelondroMergeIterator.simpleMerge, true);
     }
     
     public kelondroProfile profile() {
-        return ki.profile();
+        if (index0 != null) return index0.profile();
+        if (index1 != null) return index1.profile();
+        return null;
     }
     
     public synchronized void close() {
-        ki.close();
+        if (index0 != null) index0.close();
+        if (index1 != null) index1.close();
     }
-    
-    public synchronized void sort() {
-        if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).sort();
+
+    public synchronized String consistencyAnalysis() {
+        String s0 = (index0 == null) ? "index0: is NULL" : ("index0: " + singleConsistency((kelondroRowSet) index0));
+        String s1 = (index1 == null) ? "index1: is NULL" : ("index1: " + singleConsistency((kelondroRowSet) index1));
+        String combined = "";
+        if ((index0 == null) && (index1 == null)) return "all null";
+        if ((index0 != null) && (index1 != null)) {
+            Iterator i;
+            try {
+                i = index0.rows(true, null);
+                kelondroRow.Entry entry;
+                while (i.hasNext()) {
+                    entry = (kelondroRow.Entry) i.next();
+                    if (index1.has(entry.getColBytes(0))) {
+                        combined = combined + ", common = " + new String(entry.getColBytes(0));
+                    }
+                }
+            } catch (IOException e) {}
+        }
+        return s0 + ", " + s1 + combined;
     }
     
-    public synchronized void uniq(long time) {
-        if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).uniq(time);
+    public synchronized boolean consistencyAnalysis0() {
+        boolean s0 = ((index0 == null) || (!(index0 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index0);
+        boolean s1 = ((index1 == null) || (!(index1 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index1);
+        if (!(s0 && s1)) return false;
+        if ((index0 == null) && (index1 == null)) return true;
+        if ((index0 != null) && (index1 != null)) {
+            Iterator i;
+            try {
+                i = index0.rows(true, null);
+                kelondroRow.Entry entry;
+                while (i.hasNext()) {
+                    entry = (kelondroRow.Entry) i.next();
+                    if (index1.has(entry.getColBytes(0))) return false;
+                }
+            } catch (IOException e) {}
+        }
+        return true;
     }
     
+    private String singleConsistency(kelondroRowSet rs) {
+        int s = rs.size();
+        rs.sort();
+        rs.uniq(10000);
+        if (rs.size() == s) return "set is sound"; else return "set has " + (rs.size() - s) + " double-entries";
+    }
+    private boolean singleConsistency0(kelondroRowSet rs) {
+        int s = rs.size();
+        rs.sort();
+        rs.uniq(10000);
+        return rs.size() == s;
+    }
 }
diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java
index 3e2c65310..fe84a4a35 100644
--- a/source/de/anomic/kelondro/kelondroFlexTable.java
+++ b/source/de/anomic/kelondro/kelondroFlexTable.java
@@ -174,10 +174,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
         }
         System.out.print(" -ordering- ");
         System.out.flush();
-        ri.sort();
-        int sbu = ri.size();
-        ri.uniq(10000);        
-        if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
+        //int sbu = ri.size();
+        //if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
         return ri;
     }
     
@@ -212,7 +210,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
     
     public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
 		int pos = index.geti(key);
-		assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
+		assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size() + ", analysis: " + index.consistencyAnalysis();
 		if (pos < 0) return null;
 		// i may be greater than this.size(), because this table may have deleted entries
 		// the deleted entries are subtracted from the 'real' tablesize,
diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java
index 29b9a0f73..f9d088ccd 100644
--- a/source/de/anomic/kelondro/kelondroIntBytesMap.java
+++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java
@@ -75,7 +75,7 @@ public class kelondroIntBytesMap {
     
     public byte[] putb(int ii, byte[] value) {
     	initPhase = false;
-    	kelondroRow.Entry newentry = index1.row().newEntry();
+    	kelondroRow.Entry newentry = rowdef.newEntry();
         newentry.setCol(0, (long) ii);
         newentry.setCol(1, value);
         kelondroRow.Entry indexentry = index0.get(kelondroNaturalOrder.encodeLong((long) ii, 4));