enhanced count feature for kelondroRowSet. This is about twice as fast as before. Should speed up the collection analysis (half time!)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5698 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 3e4c28e188
parent 84e37387a2
commit 3e4c28e188
6 changed files with 72 additions and 25 deletions
--- a/source/de/anomic/kelondro/index/IntegerHandleIndex.java
+++ b/source/de/anomic/kelondro/index/IntegerHandleIndex.java
@ -51,7 +51,7 @@ import de.anomic.yacy.dht.FlatWordPartitionScheme;
 public class IntegerHandleIndex {
    
    private final Row rowdef;
-    private ObjectIndex index;
+    private ObjectIndexCache index;
    
    public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int space) {
        this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder, 0);
@ -134,7 +134,18 @@ public class IntegerHandleIndex {
        return (int) oldentry.getColLong(1);
    }

-    public synchronized int add(final byte[] key, int a) throws IOException {
+    public synchronized int inc(final byte[] key, int a) throws IOException {
+        assert key != null;
+        assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
+
+        final Row.Entry newentry = this.rowdef.newEntry();
+        newentry.setCol(0, key);
+        newentry.setCol(1, a);
+        long l = index.inc(key, 1, a, newentry);
+        return (int) l;
+    }
+    /*
+    public synchronized int inc(final byte[] key, int a) throws IOException {
        assert key != null;
        assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue

@ -146,20 +157,12 @@ public class IntegerHandleIndex {
            index.addUnique(newentry);
            return 1;
        } else {
-            int i = (int) indexentry.getColLong(1) + a;
-            indexentry.setCol(1, i);
+            long l = indexentry.incCol(1, a);
            index.put(indexentry);
-            return i;
+            return (int) l;
        }
    }
-    
-    public synchronized int inc(final byte[] key) throws IOException {
-        return add(key, 1);
-    }
-    
-    public synchronized int dec(final byte[] key) throws IOException {
-        return add(key, -1);
-    }
+    */
    
    public synchronized void putUnique(final byte[] key, final int i) throws IOException {
        assert i >= 0 : "i = " + i;
@ -325,7 +328,7 @@ public class IntegerHandleIndex {
        long start = System.currentTimeMillis();
        try {
            for (int i = 0; i < count; i++) {
-                idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count / 32)).getBytes());
+                idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count / 32)).getBytes(), 1);
            }
        } catch (IOException e) {
            e.printStackTrace();
--- a/source/de/anomic/kelondro/index/ObjectIndex.java
+++ b/source/de/anomic/kelondro/index/ObjectIndex.java
@ -50,6 +50,7 @@ public interface ObjectIndex {
    public void putMultiple(List<Row.Entry> rows) throws IOException; // for R/W head path optimization
    public void addUnique(Row.Entry row) throws IOException; // no double-check
    public void addUniqueMultiple(List<Row.Entry> rows) throws IOException; // no double-check
+    //public long inc(final byte[] key, int col, long add, Row.Entry initrow); // replace a column with a recomputed value
    public ArrayList<RowCollection> removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique)
    public Row.Entry remove(byte[] key) throws IOException;
    public Row.Entry removeOne() throws IOException;
--- a/source/de/anomic/kelondro/index/ObjectIndexCache.java
+++ b/source/de/anomic/kelondro/index/ObjectIndexCache.java
@ -87,6 +87,7 @@ public class ObjectIndexCache implements ObjectIndex {
        return index1.has(key);
 	}
    
+	/*
    public synchronized Row.Entry put(final Row.Entry entry) {
    	assert (entry != null);
    	finishInitialization();
@ -100,8 +101,22 @@ public class ObjectIndexCache implements ObjectIndex {
        // else place it in the index1
        return index1.put(entry);
    }
+    */
+    public synchronized Row.Entry put(final Row.Entry entry) {
+        assert (entry != null);
+        finishInitialization();
+        // if the new entry is within the initialization part, just overwrite it
+        assert index0.isSorted();
+        byte[] key = entry.getPrimaryKeyBytes();
+        if (index0.has(key)) {
+            // replace the entry
+            return index0.put(entry);
+        }
+        // else place it in the index1
+        return index1.put(entry);
+    }
    
-	public Entry put(final Entry row, final Date entryDate) {
+    public Entry put(final Entry row, final Date entryDate) {
 		return put(row);
 	}
 	
@ -128,7 +143,16 @@ public class ObjectIndexCache implements ObjectIndex {
 		while (i.hasNext()) addUnique(i.next());
 	}
 	
-	public synchronized ArrayList<RowCollection> removeDoubles() {
+	public synchronized long inc(final byte[] key, int col, long add, Row.Entry initrow) {
+        assert (key != null);
+        finishInitialization();
+        assert index0.isSorted();
+        long l = index0.inc(key, col, add, null);
+        if (l != Long.MIN_VALUE) return l;
+        return index1.inc(key, col, add, initrow);
+    }    
+    
+    public synchronized ArrayList<RowCollection> removeDoubles() {
 	    // finish initialization phase explicitely
        index0.sort();
 	    if (index1 == null) {
--- a/source/de/anomic/kelondro/index/Row.java
+++ b/source/de/anomic/kelondro/index/Row.java
@ -464,20 +464,20 @@ public final class Row {
            }
        }
        
-        public final void addCol(final int column, long c) {
+        public final long incCol(final int column, long c) {
            int encoder = row[column].encoder;
            int colstrt = colstart[column];
            int cellwidth = row[column].cellwidth;
            long l;
            switch (encoder) {
            case Column.encoder_b64e:
-                l = Base64Order.enhancedCoder.decodeLong(rowinstance, offset + colstrt, cellwidth);
-                Base64Order.enhancedCoder.encodeLong(l + c, rowinstance, offset, cellwidth);
-                return;
+                l = c + Base64Order.enhancedCoder.decodeLong(rowinstance, offset + colstrt, cellwidth);
+                Base64Order.enhancedCoder.encodeLong(l, rowinstance, offset + colstrt, cellwidth);
+                return l;
            case Column.encoder_b256:
-                l = NaturalOrder.decodeLong(rowinstance, offset + colstrt, cellwidth);
-                NaturalOrder.encodeLong(l + c, rowinstance, offset, cellwidth);
-                return;
+                l = c + NaturalOrder.decodeLong(rowinstance, offset + colstrt, cellwidth);
+                NaturalOrder.encodeLong(l, rowinstance, offset + colstrt, cellwidth);
+                return l;
            }
            throw new kelondroException("ROW", "addCol did not find appropriate encoding");
        }
--- a/source/de/anomic/kelondro/index/RowSet.java
+++ b/source/de/anomic/kelondro/index/RowSet.java
@ -135,6 +135,25 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.E
        return oldentry;
    }

+    public synchronized long inc(byte[] key, int col, long add, Row.Entry initrow) {
+        final int index = find(key, 0, key.length);
+        if (index >= 0) {
+            // the entry existed before
+            final Row.Entry entry = get(index, false); // no clone necessary
+            long l = entry.incCol(col, add);
+            set(index, entry);
+            return l;
+        } else if (initrow != null) {
+            // create new entry
+            super.addUnique(initrow);
+            return initrow.getColLong(col);
+        } else {
+            // if initrow == null just do nothing
+            // but return a Long.MIN_VALUE
+            return Long.MIN_VALUE;
+        }
+    }
+    
    private synchronized Row.Entry remove(final byte[] a, final int start, final int length) {
        final int index = find(a, start, length);
        if (index < 0) return null;
--- a/source/de/anomic/kelondro/text/IndexCollection.java
+++ b/source/de/anomic/kelondro/text/IndexCollection.java
@ -439,7 +439,7 @@ public class IndexCollection implements Index {
                final RowSet collection = new RowSet(payloadrow, arrayrow);
                final int chunkcountInArray = collection.size();
                for (int j = 0; j < chunkcountInArray; j++) {
-                    references.inc(collection.get(j, false).getColBytes(0));
+                    references.inc(collection.get(j, false).getColBytes(0), 1);
                }
                count++;
                // write a log