added new class RowSetArray which arranges RowSet objects like Elements in a hashtable, but still provides the functionality of sorted enumeration. The new class is now integrated into the ObjectIndexCache, which is the core class to provide index functions to all database files. The new index access is about twice as fast as before. This has strong speed enhancement effects on all parts of YaCy.

The speed of the kelondro indexing class ObjectIndexCache can be compared with Javas standard TreeMap with the main method in IntegerHandleIndex. The result is, that the kelondro indexing needs only 1/5 of the memory that TreeMap uses! In exchange, the kelondro classes are slower than TreeMap, about four (!) times slower. However, this is not so bad because the better use of the memory is a strong advantage and makes it possible that YaCy can maintain such a large number of document (> 50 million) in one peer.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5705 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 0a2fabeef3
commit f6d989aa04

@ -85,7 +85,7 @@ public class Balancer {
try {
final Iterator<byte[]> i = urlFileIndex.keys(true, null);
byte[] hash;
while (i.hasNext()) {
while (i != null && i.hasNext()) {
hash = i.next();
pushHashToDomainStacks(new String(hash), true);
}

@ -33,9 +33,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import java.util.TreeMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
@ -350,7 +350,7 @@ public class IntegerHandleIndex {
Integer d;
System.gc(); // for resource measurement
a = MemoryControl.available();
HashMap<String, Integer> hm = new HashMap<String, Integer>(0);
TreeMap<String, Integer> hm = new TreeMap<String, Integer>();
for (int i = 0; i < count; i++) {
hash = FlatWordPartitionScheme.positionToHash(r.nextInt(count));
d = hm.get(hash);
@ -364,7 +364,7 @@ public class IntegerHandleIndex {
System.out.println("Used Memory: " + memj + " bytes");
System.out.println("x " + hm.get(FlatWordPartitionScheme.positionToHash(0)));
System.out.println("Geschwindigkeitsfaktor j/k: " + (timej / timek));
System.out.println("Speicherfaktor j/k: " + (memj / memk));
System.out.println("Speicherplatzfaktor j/k: " + (memj / memk));
System.exit(0);
}

@ -24,7 +24,6 @@
package de.anomic.kelondro.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@ -36,9 +35,10 @@ import de.anomic.kelondro.order.StackIterator;
public class ObjectIndexCache implements ObjectIndex {
private static final int spread = 1000;
private final Row rowdef;
private RowSet index0;
private RowSet index1;
private RowSetArray index1;
private final Row.EntryComparator entryComparator;
public ObjectIndexCache(final Row rowdef, final int initialspace) {
@ -66,7 +66,7 @@ public class ObjectIndexCache implements ObjectIndex {
// finish initialization phase
index0.sort();
index0.uniq();
index1 = new RowSet(rowdef, 0);
index1 = new RowSetArray(rowdef, 0, spread);
}
}
@ -166,7 +166,6 @@ public class ObjectIndexCache implements ObjectIndex {
if (index1 == null) {
return index0.removeDoubles();
}
index1.sort();
ArrayList<RowCollection> d0 = index0.removeDoubles();
ArrayList<RowCollection> d1 = index1.removeDoubles();
d0.addAll(d1);
@ -214,7 +213,7 @@ public class ObjectIndexCache implements ObjectIndex {
// finish initialization phase
index0.sort();
index0.uniq();
index1 = new RowSet(rowdef, 0);
index1 = new RowSetArray(rowdef, 0, spread);
return index0.keys(up, firstKey);
}
assert (index1 != null);
@ -224,11 +223,14 @@ public class ObjectIndexCache implements ObjectIndex {
}
// index0 should be sorted
// sort index1 to enable working of the merge iterator
index1.sort();
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
CloneableIterator<byte[]> k0 = index0.keys(up, firstKey);
CloneableIterator<byte[]> k1 = index1.keys(up, firstKey);
if (k0 == null) return k1;
if (k1 == null) return k0;
return new MergeIterator<byte[]>(
index0.keys(up, firstKey),
index1.keys(up, firstKey),
k0,
k1,
rowdef.objectOrder,
MergeIterator.simpleMerge,
true);
@ -240,7 +242,7 @@ public class ObjectIndexCache implements ObjectIndex {
// finish initialization phase
index0.sort();
index0.uniq();
index1 = new RowSet(rowdef, 0);
index1 = new RowSetArray(rowdef, 0, spread);
return index0.rows(up, firstKey);
}
assert (index1 != null);
@ -250,23 +252,27 @@ public class ObjectIndexCache implements ObjectIndex {
}
// index0 should be sorted
// sort index1 to enable working of the merge iterator
index1.sort();
//index1.sort();
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
CloneableIterator<Row.Entry> k0 = index0.rows(up, firstKey);
CloneableIterator<Row.Entry> k1 = index1.rows(up, firstKey);
if (k0 == null) return k1;
if (k1 == null) return k0;
return new MergeIterator<Row.Entry>(
index0.rows(up, firstKey),
index1.rows(up, firstKey),
k0,
k1,
entryComparator,
MergeIterator.simpleMerge,
true);
}
public synchronized CloneableIterator<Row.Entry> rows() throws IOException {
public synchronized CloneableIterator<Row.Entry> rows() {
// returns the row-iterator of the underlying kelondroIndex
if (index1 == null) {
// finish initialization phase
index0.sort();
index0.uniq();
index1 = new RowSet(rowdef, 0);
index1 = new RowSetArray(rowdef, 0, spread);
return index0.rows();
}
assert (index1 != null);
@ -276,7 +282,7 @@ public class ObjectIndexCache implements ObjectIndex {
}
// index0 should be sorted
// sort index1 to enable working of the merge iterator
index1.sort();
//index1.sort();
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new StackIterator<Row.Entry>(index0.rows(), index1.rows());
}

@ -0,0 +1,205 @@
// RowSetArray.java
// --------------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://yacy.net
// Frankfurt, Germany, 2009
// last major change: 12.03.2009
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.index;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import de.anomic.kelondro.index.Row.Entry;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.MergeIterator;
import de.anomic.kelondro.order.StackIterator;
public class RowSetArray implements ObjectIndex, Iterable<Row.Entry> {
private final int objectCount;
private final Row rowdef;
private final RowSet[] array;
public RowSetArray(final Row rowdef, final int objectCount, final int arraySize) {
this.array = new RowSet[arraySize];
for (int i = 0; i < arraySize; i++) {
this.array[i] = null;
}
this.rowdef = rowdef;
this.objectCount = objectCount / arraySize;
}
private int indexFor(byte[] key) {
return (int) (this.rowdef.objectOrder.cardinal(key) % ((long) array.length));
}
private int indexFor(Entry row) {
return indexFor(row.getPrimaryKeyBytes());
}
private RowSet accessArray(int i) {
RowSet r = this.array[i];
if (r == null) synchronized (this.array) {
r = new RowSet(this.rowdef, this.objectCount);
this.array[i] = r;
}
return r;
}
public void addUnique(Entry row) {
accessArray(indexFor(row)).addUnique(row);
}
public void addUnique(List<Entry> rows) {
for (Entry row: rows) addUnique(row);
}
public void clear() {
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) this.array[i].clear();
this.array[i] = null;
}
}
}
public void close() {
clear();
}
public void deleteOnExit() {
// no nothing here
}
public String filename() {
// we don't have a file name
return null;
}
public Entry get(byte[] key) {
int i = indexFor(key);
RowSet r = this.array[i];
if (r == null) return null;
return r.get(key);
}
public boolean has(byte[] key) {
int i = indexFor(key);
RowSet r = this.array[i];
if (r == null) return false;
return r.has(key);
}
public CloneableIterator<byte[]> keys(boolean up, byte[] firstKey) {
synchronized (this.array) {
Collection<CloneableIterator<byte[]>> col = new ArrayList<CloneableIterator<byte[]>>();
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
this.array[i].sort();
col.add(this.array[i].keys(up, firstKey));
}
}
return MergeIterator.cascade(col, this.rowdef.objectOrder, MergeIterator.simpleMerge, up);
}
}
public void put(Entry row) {
accessArray(indexFor(row)).put(row);
}
public void put(List<Entry> rows) {
for (Entry row: rows) put(row);
}
public Entry remove(byte[] key) {
return accessArray(indexFor(key)).remove(key);
}
public ArrayList<RowCollection> removeDoubles() {
ArrayList<RowCollection> col = new ArrayList<RowCollection>();
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
col.addAll(this.array[i].removeDoubles());
if (this.array[i].size() == 0) this.array[i] = null;
}
}
}
return col;
}
public Entry removeOne() {
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
Entry entry = this.array[i].removeOne();
if (this.array[i].size() == 0) this.array[i] = null;
return entry;
}
}
}
return null;
}
public Entry replace(Entry row) {
return accessArray(indexFor(row)).replace(row);
}
public Row row() {
return this.rowdef;
}
public CloneableIterator<Entry> rows(boolean up, byte[] firstKey) {
synchronized (this.array) {
Collection<CloneableIterator<Entry>> col = new ArrayList<CloneableIterator<Entry>>();
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
this.array[i].sort();
col.add(this.array[i].rows(up, firstKey));
}
}
return StackIterator.stack(col);
}
}
public CloneableIterator<Entry> rows() {
return rows(true, null);
}
public int size() {
int c = 0;
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
c += this.array[i].size();
}
}
}
return c;
}
public Iterator<Entry> iterator() {
return this.rows(true, null);
}
public long inc(byte[] key, int col, long add, Entry initrow) {
return accessArray(indexFor(key)).inc(key, col, add, initrow);
}
}

@ -45,6 +45,8 @@ public class MergeIterator<E> implements CloneableIterator<E> {
final Method m,
final boolean up) {
// this works currently only for String-type key iterations
assert a != null;
assert b != null;
this.a = a;
this.b = b;
this.up = up;
@ -55,6 +57,8 @@ public class MergeIterator<E> implements CloneableIterator<E> {
}
public MergeIterator<E> clone(final Object modifier) {
assert a != null;
assert b != null;
return new MergeIterator<E>(a.clone(modifier), b.clone(modifier), comp, merger, up);
}

@ -389,8 +389,10 @@ public class SplitTable implements ObjectIndex {
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(tables.size());
final Iterator<ObjectIndex> i = tables.values().iterator();
CloneableIterator<byte[]> k;
while (i.hasNext()) {
c.add(i.next().keys(up, firstKey));
k = i.next().keys(up, firstKey);
if (k != null) c.add(k);
}
return MergeIterator.cascade(c, rowdef.objectOrder, MergeIterator.simpleMerge, up);
}

@ -640,7 +640,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
ArrayList<String> l = new ArrayList<String>();
CloneableIterator<byte[]> i = this.urlIndexFile.keys(true, null);
String hash;
while (i.hasNext()) {
while (i != null && i.hasNext()) {
hash = new String(i.next());
if (hosthash.equals(hash.substring(6))) l.add(hash);
}

Loading…
Cancel
Save