diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index d3c340a18..2503d947b 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -85,7 +85,7 @@ public class Balancer { try { final Iterator i = urlFileIndex.keys(true, null); byte[] hash; - while (i.hasNext()) { + while (i != null && i.hasNext()) { hash = i.next(); pushHashToDomainStacks(new String(hash), true); } diff --git a/source/de/anomic/kelondro/index/IntegerHandleIndex.java b/source/de/anomic/kelondro/index/IntegerHandleIndex.java index befcf53f2..ba18264a0 100644 --- a/source/de/anomic/kelondro/index/IntegerHandleIndex.java +++ b/source/de/anomic/kelondro/index/IntegerHandleIndex.java @@ -33,9 +33,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; -import java.util.HashMap; import java.util.Iterator; import java.util.Random; +import java.util.TreeMap; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; @@ -350,7 +350,7 @@ public class IntegerHandleIndex { Integer d; System.gc(); // for resource measurement a = MemoryControl.available(); - HashMap hm = new HashMap(0); + TreeMap hm = new TreeMap(); for (int i = 0; i < count; i++) { hash = FlatWordPartitionScheme.positionToHash(r.nextInt(count)); d = hm.get(hash); @@ -364,7 +364,7 @@ public class IntegerHandleIndex { System.out.println("Used Memory: " + memj + " bytes"); System.out.println("x " + hm.get(FlatWordPartitionScheme.positionToHash(0))); System.out.println("Geschwindigkeitsfaktor j/k: " + (timej / timek)); - System.out.println("Speicherfaktor j/k: " + (memj / memk)); + System.out.println("Speicherplatzfaktor j/k: " + (memj / memk)); System.exit(0); } diff --git a/source/de/anomic/kelondro/index/ObjectIndexCache.java b/source/de/anomic/kelondro/index/ObjectIndexCache.java index da431467c..2a992889f 100644 --- a/source/de/anomic/kelondro/index/ObjectIndexCache.java +++ b/source/de/anomic/kelondro/index/ObjectIndexCache.java @@ -24,7 +24,6 @@ package de.anomic.kelondro.index; -import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -36,9 +35,10 @@ import de.anomic.kelondro.order.StackIterator; public class ObjectIndexCache implements ObjectIndex { + private static final int spread = 1000; private final Row rowdef; private RowSet index0; - private RowSet index1; + private RowSetArray index1; private final Row.EntryComparator entryComparator; public ObjectIndexCache(final Row rowdef, final int initialspace) { @@ -66,7 +66,7 @@ public class ObjectIndexCache implements ObjectIndex { // finish initialization phase index0.sort(); index0.uniq(); - index1 = new RowSet(rowdef, 0); + index1 = new RowSetArray(rowdef, 0, spread); } } @@ -166,7 +166,6 @@ public class ObjectIndexCache implements ObjectIndex { if (index1 == null) { return index0.removeDoubles(); } - index1.sort(); ArrayList d0 = index0.removeDoubles(); ArrayList d1 = index1.removeDoubles(); d0.addAll(d1); @@ -214,7 +213,7 @@ public class ObjectIndexCache implements ObjectIndex { // finish initialization phase index0.sort(); index0.uniq(); - index1 = new RowSet(rowdef, 0); + index1 = new RowSetArray(rowdef, 0, spread); return index0.keys(up, firstKey); } assert (index1 != null); @@ -224,11 +223,14 @@ public class ObjectIndexCache implements ObjectIndex { } // index0 should be sorted // sort index1 to enable working of the merge iterator - index1.sort(); //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + CloneableIterator k0 = index0.keys(up, firstKey); + CloneableIterator k1 = index1.keys(up, firstKey); + if (k0 == null) return k1; + if (k1 == null) return k0; return new MergeIterator( - index0.keys(up, firstKey), - index1.keys(up, firstKey), + k0, + k1, rowdef.objectOrder, MergeIterator.simpleMerge, true); @@ -240,7 +242,7 @@ public class ObjectIndexCache implements ObjectIndex { // finish initialization phase index0.sort(); index0.uniq(); - index1 = new RowSet(rowdef, 0); + index1 = new RowSetArray(rowdef, 0, spread); return index0.rows(up, firstKey); } assert (index1 != null); @@ -250,23 +252,27 @@ public class ObjectIndexCache implements ObjectIndex { } // index0 should be sorted // sort index1 to enable working of the merge iterator - index1.sort(); + //index1.sort(); //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + CloneableIterator k0 = index0.rows(up, firstKey); + CloneableIterator k1 = index1.rows(up, firstKey); + if (k0 == null) return k1; + if (k1 == null) return k0; return new MergeIterator( - index0.rows(up, firstKey), - index1.rows(up, firstKey), + k0, + k1, entryComparator, MergeIterator.simpleMerge, true); } - public synchronized CloneableIterator rows() throws IOException { + public synchronized CloneableIterator rows() { // returns the row-iterator of the underlying kelondroIndex if (index1 == null) { // finish initialization phase index0.sort(); index0.uniq(); - index1 = new RowSet(rowdef, 0); + index1 = new RowSetArray(rowdef, 0, spread); return index0.rows(); } assert (index1 != null); @@ -276,7 +282,7 @@ public class ObjectIndexCache implements ObjectIndex { } // index0 should be sorted // sort index1 to enable working of the merge iterator - index1.sort(); + //index1.sort(); //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return new StackIterator(index0.rows(), index1.rows()); } diff --git a/source/de/anomic/kelondro/index/RowSetArray.java b/source/de/anomic/kelondro/index/RowSetArray.java new file mode 100644 index 000000000..37da24e73 --- /dev/null +++ b/source/de/anomic/kelondro/index/RowSetArray.java @@ -0,0 +1,205 @@ +// RowSetArray.java +// -------------------------- +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://yacy.net +// Frankfurt, Germany, 2009 +// last major change: 12.03.2009 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro.index; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import de.anomic.kelondro.index.Row.Entry; +import de.anomic.kelondro.order.CloneableIterator; +import de.anomic.kelondro.order.MergeIterator; +import de.anomic.kelondro.order.StackIterator; + +public class RowSetArray implements ObjectIndex, Iterable { + + private final int objectCount; + private final Row rowdef; + private final RowSet[] array; + + public RowSetArray(final Row rowdef, final int objectCount, final int arraySize) { + this.array = new RowSet[arraySize]; + for (int i = 0; i < arraySize; i++) { + this.array[i] = null; + } + this.rowdef = rowdef; + this.objectCount = objectCount / arraySize; + } + + private int indexFor(byte[] key) { + return (int) (this.rowdef.objectOrder.cardinal(key) % ((long) array.length)); + } + + private int indexFor(Entry row) { + return indexFor(row.getPrimaryKeyBytes()); + } + + private RowSet accessArray(int i) { + RowSet r = this.array[i]; + if (r == null) synchronized (this.array) { + r = new RowSet(this.rowdef, this.objectCount); + this.array[i] = r; + } + return r; + } + + public void addUnique(Entry row) { + accessArray(indexFor(row)).addUnique(row); + } + + public void addUnique(List rows) { + for (Entry row: rows) addUnique(row); + } + + public void clear() { + synchronized (this.array) { + for (int i = 0; i < this.array.length; i++) { + if (this.array[i] != null) this.array[i].clear(); + this.array[i] = null; + } + } + } + + public void close() { + clear(); + } + + public void deleteOnExit() { + // no nothing here + } + + public String filename() { + // we don't have a file name + return null; + } + + public Entry get(byte[] key) { + int i = indexFor(key); + RowSet r = this.array[i]; + if (r == null) return null; + return r.get(key); + } + + public boolean has(byte[] key) { + int i = indexFor(key); + RowSet r = this.array[i]; + if (r == null) return false; + return r.has(key); + } + + public CloneableIterator keys(boolean up, byte[] firstKey) { + synchronized (this.array) { + Collection> col = new ArrayList>(); + for (int i = 0; i < this.array.length; i++) { + if (this.array[i] != null) { + this.array[i].sort(); + col.add(this.array[i].keys(up, firstKey)); + } + } + return MergeIterator.cascade(col, this.rowdef.objectOrder, MergeIterator.simpleMerge, up); + } + } + + public void put(Entry row) { + accessArray(indexFor(row)).put(row); + } + + public void put(List rows) { + for (Entry row: rows) put(row); + } + + public Entry remove(byte[] key) { + return accessArray(indexFor(key)).remove(key); + } + + public ArrayList removeDoubles() { + ArrayList col = new ArrayList(); + synchronized (this.array) { + for (int i = 0; i < this.array.length; i++) { + if (this.array[i] != null) { + col.addAll(this.array[i].removeDoubles()); + if (this.array[i].size() == 0) this.array[i] = null; + } + } + } + return col; + } + + public Entry removeOne() { + synchronized (this.array) { + for (int i = 0; i < this.array.length; i++) { + if (this.array[i] != null) { + Entry entry = this.array[i].removeOne(); + if (this.array[i].size() == 0) this.array[i] = null; + return entry; + } + } + } + return null; + } + + public Entry replace(Entry row) { + return accessArray(indexFor(row)).replace(row); + } + + public Row row() { + return this.rowdef; + } + + public CloneableIterator rows(boolean up, byte[] firstKey) { + synchronized (this.array) { + Collection> col = new ArrayList>(); + for (int i = 0; i < this.array.length; i++) { + if (this.array[i] != null) { + this.array[i].sort(); + col.add(this.array[i].rows(up, firstKey)); + } + } + return StackIterator.stack(col); + } + } + + public CloneableIterator rows() { + return rows(true, null); + } + + public int size() { + int c = 0; + synchronized (this.array) { + for (int i = 0; i < this.array.length; i++) { + if (this.array[i] != null) { + c += this.array[i].size(); + } + } + } + return c; + } + + public Iterator iterator() { + return this.rows(true, null); + } + + public long inc(byte[] key, int col, long add, Entry initrow) { + return accessArray(indexFor(key)).inc(key, col, add, initrow); + } +} diff --git a/source/de/anomic/kelondro/order/MergeIterator.java b/source/de/anomic/kelondro/order/MergeIterator.java index bf9399662..1b99f0aaf 100644 --- a/source/de/anomic/kelondro/order/MergeIterator.java +++ b/source/de/anomic/kelondro/order/MergeIterator.java @@ -45,6 +45,8 @@ public class MergeIterator implements CloneableIterator { final Method m, final boolean up) { // this works currently only for String-type key iterations + assert a != null; + assert b != null; this.a = a; this.b = b; this.up = up; @@ -55,6 +57,8 @@ public class MergeIterator implements CloneableIterator { } public MergeIterator clone(final Object modifier) { + assert a != null; + assert b != null; return new MergeIterator(a.clone(modifier), b.clone(modifier), comp, merger, up); } diff --git a/source/de/anomic/kelondro/table/SplitTable.java b/source/de/anomic/kelondro/table/SplitTable.java index fdc072cec..ea94617c4 100644 --- a/source/de/anomic/kelondro/table/SplitTable.java +++ b/source/de/anomic/kelondro/table/SplitTable.java @@ -389,8 +389,10 @@ public class SplitTable implements ObjectIndex { public synchronized CloneableIterator keys(final boolean up, final byte[] firstKey) throws IOException { final List> c = new ArrayList>(tables.size()); final Iterator i = tables.values().iterator(); + CloneableIterator k; while (i.hasNext()) { - c.add(i.next().keys(up, firstKey)); + k = i.next().keys(up, firstKey); + if (k != null) c.add(k); } return MergeIterator.cascade(c, rowdef.objectOrder, MergeIterator.simpleMerge, up); } diff --git a/source/de/anomic/kelondro/text/MetadataRepository.java b/source/de/anomic/kelondro/text/MetadataRepository.java index fc8baa9a4..bf616dd0f 100644 --- a/source/de/anomic/kelondro/text/MetadataRepository.java +++ b/source/de/anomic/kelondro/text/MetadataRepository.java @@ -640,7 +640,7 @@ public final class MetadataRepository implements Iterable { ArrayList l = new ArrayList(); CloneableIterator i = this.urlIndexFile.keys(true, null); String hash; - while (i.hasNext()) { + while (i != null && i.hasNext()) { hash = new String(i.next()); if (hosthash.equals(hash.substring(6))) l.add(hash); }