enhanced count feature for kelondroRowSet. This is about twice as fast as before. Should speed up the collection analysis (half time!)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5698 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 84e37387a2
commit 3e4c28e188

@ -51,7 +51,7 @@ import de.anomic.yacy.dht.FlatWordPartitionScheme;
public class IntegerHandleIndex {
private final Row rowdef;
private ObjectIndex index;
private ObjectIndexCache index;
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int space) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder, 0);
@ -134,7 +134,18 @@ public class IntegerHandleIndex {
return (int) oldentry.getColLong(1);
}
public synchronized int add(final byte[] key, int a) throws IOException {
public synchronized int inc(final byte[] key, int a) throws IOException {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, a);
long l = index.inc(key, 1, a, newentry);
return (int) l;
}
/*
public synchronized int inc(final byte[] key, int a) throws IOException {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
@ -146,20 +157,12 @@ public class IntegerHandleIndex {
index.addUnique(newentry);
return 1;
} else {
int i = (int) indexentry.getColLong(1) + a;
indexentry.setCol(1, i);
long l = indexentry.incCol(1, a);
index.put(indexentry);
return i;
return (int) l;
}
}
public synchronized int inc(final byte[] key) throws IOException {
return add(key, 1);
}
public synchronized int dec(final byte[] key) throws IOException {
return add(key, -1);
}
*/
public synchronized void putUnique(final byte[] key, final int i) throws IOException {
assert i >= 0 : "i = " + i;
@ -325,7 +328,7 @@ public class IntegerHandleIndex {
long start = System.currentTimeMillis();
try {
for (int i = 0; i < count; i++) {
idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count / 32)).getBytes());
idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count / 32)).getBytes(), 1);
}
} catch (IOException e) {
e.printStackTrace();

@ -50,6 +50,7 @@ public interface ObjectIndex {
public void putMultiple(List<Row.Entry> rows) throws IOException; // for R/W head path optimization
public void addUnique(Row.Entry row) throws IOException; // no double-check
public void addUniqueMultiple(List<Row.Entry> rows) throws IOException; // no double-check
//public long inc(final byte[] key, int col, long add, Row.Entry initrow); // replace a column with a recomputed value
public ArrayList<RowCollection> removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique)
public Row.Entry remove(byte[] key) throws IOException;
public Row.Entry removeOne() throws IOException;

@ -87,6 +87,7 @@ public class ObjectIndexCache implements ObjectIndex {
return index1.has(key);
}
/*
public synchronized Row.Entry put(final Row.Entry entry) {
assert (entry != null);
finishInitialization();
@ -100,8 +101,22 @@ public class ObjectIndexCache implements ObjectIndex {
// else place it in the index1
return index1.put(entry);
}
*/
public synchronized Row.Entry put(final Row.Entry entry) {
assert (entry != null);
finishInitialization();
// if the new entry is within the initialization part, just overwrite it
assert index0.isSorted();
byte[] key = entry.getPrimaryKeyBytes();
if (index0.has(key)) {
// replace the entry
return index0.put(entry);
}
// else place it in the index1
return index1.put(entry);
}
public Entry put(final Entry row, final Date entryDate) {
public Entry put(final Entry row, final Date entryDate) {
return put(row);
}
@ -128,7 +143,16 @@ public class ObjectIndexCache implements ObjectIndex {
while (i.hasNext()) addUnique(i.next());
}
public synchronized ArrayList<RowCollection> removeDoubles() {
public synchronized long inc(final byte[] key, int col, long add, Row.Entry initrow) {
assert (key != null);
finishInitialization();
assert index0.isSorted();
long l = index0.inc(key, col, add, null);
if (l != Long.MIN_VALUE) return l;
return index1.inc(key, col, add, initrow);
}
public synchronized ArrayList<RowCollection> removeDoubles() {
// finish initialization phase explicitely
index0.sort();
if (index1 == null) {

@ -464,20 +464,20 @@ public final class Row {
}
}
public final void addCol(final int column, long c) {
public final long incCol(final int column, long c) {
int encoder = row[column].encoder;
int colstrt = colstart[column];
int cellwidth = row[column].cellwidth;
long l;
switch (encoder) {
case Column.encoder_b64e:
l = Base64Order.enhancedCoder.decodeLong(rowinstance, offset + colstrt, cellwidth);
Base64Order.enhancedCoder.encodeLong(l + c, rowinstance, offset, cellwidth);
return;
l = c + Base64Order.enhancedCoder.decodeLong(rowinstance, offset + colstrt, cellwidth);
Base64Order.enhancedCoder.encodeLong(l, rowinstance, offset + colstrt, cellwidth);
return l;
case Column.encoder_b256:
l = NaturalOrder.decodeLong(rowinstance, offset + colstrt, cellwidth);
NaturalOrder.encodeLong(l + c, rowinstance, offset, cellwidth);
return;
l = c + NaturalOrder.decodeLong(rowinstance, offset + colstrt, cellwidth);
NaturalOrder.encodeLong(l, rowinstance, offset + colstrt, cellwidth);
return l;
}
throw new kelondroException("ROW", "addCol did not find appropriate encoding");
}

@ -135,6 +135,25 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.E
return oldentry;
}
public synchronized long inc(byte[] key, int col, long add, Row.Entry initrow) {
final int index = find(key, 0, key.length);
if (index >= 0) {
// the entry existed before
final Row.Entry entry = get(index, false); // no clone necessary
long l = entry.incCol(col, add);
set(index, entry);
return l;
} else if (initrow != null) {
// create new entry
super.addUnique(initrow);
return initrow.getColLong(col);
} else {
// if initrow == null just do nothing
// but return a Long.MIN_VALUE
return Long.MIN_VALUE;
}
}
private synchronized Row.Entry remove(final byte[] a, final int start, final int length) {
final int index = find(a, start, length);
if (index < 0) return null;

@ -439,7 +439,7 @@ public class IndexCollection implements Index {
final RowSet collection = new RowSet(payloadrow, arrayrow);
final int chunkcountInArray = collection.size();
for (int j = 0; j < chunkcountInArray; j++) {
references.inc(collection.get(j, false).getColBytes(0));
references.inc(collection.get(j, false).getColBytes(0), 1);
}
count++;
// write a log

Loading…
Cancel
Save