You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1127 lines
49 KiB
1127 lines
49 KiB
// RowCollection.java
|
|
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
|
// first published 12.01.2006 on http://www.anomic.de
|
|
//
|
|
// $LastChangedDate$
|
|
// $LastChangedRevision$
|
|
// $LastChangedBy$
|
|
//
|
|
// LICENSE
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
package net.yacy.kelondro.index;
|
|
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.Serializable;
|
|
import java.util.ArrayList;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.Random;
|
|
import java.util.concurrent.Callable;
|
|
|
|
import net.yacy.cora.document.ASCII;
|
|
import net.yacy.cora.document.UTF8;
|
|
import net.yacy.cora.order.ByteOrder;
|
|
import net.yacy.cora.sorting.Array;
|
|
import net.yacy.cora.sorting.Sortable;
|
|
import net.yacy.kelondro.index.Row.Entry;
|
|
import net.yacy.kelondro.logging.Log;
|
|
import net.yacy.kelondro.order.Base64Order;
|
|
import net.yacy.kelondro.order.NaturalOrder;
|
|
import net.yacy.kelondro.util.FileUtils;
|
|
import net.yacy.kelondro.util.MemoryControl;
|
|
import net.yacy.kelondro.util.kelondroException;
|
|
|
|
|
|
public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>, Cloneable, Serializable {
|
|
|
|
private static final long serialVersionUID=-4670634138825982705L;
|
|
private static final byte[] EMPTY_CACHE = new byte[0];
|
|
|
|
public static final long growfactorLarge100 = 140L;
|
|
public static final long growfactorSmall100 = 120L;
|
|
private static final int isortlimit = 20;
|
|
|
|
private static final int exp_chunkcount = 0;
|
|
private static final int exp_last_read = 1;
|
|
private static final int exp_last_wrote = 2;
|
|
private static final int exp_order_type = 3;
|
|
private static final int exp_order_bound = 4;
|
|
private static final int exp_collection = 5;
|
|
|
|
protected final Row rowdef;
|
|
protected byte[] chunkcache;
|
|
protected int chunkcount;
|
|
protected int sortBound;
|
|
protected long lastTimeWrote;
|
|
|
|
protected RowCollection(final RowCollection rc) {
|
|
this.rowdef = rc.rowdef;
|
|
this.chunkcache = rc.chunkcache;
|
|
this.chunkcount = rc.chunkcount;
|
|
this.sortBound = rc.sortBound;
|
|
this.lastTimeWrote = rc.lastTimeWrote;
|
|
}
|
|
|
|
protected RowCollection(final Row rowdef) {
|
|
this.rowdef = rowdef;
|
|
this.sortBound = 0;
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
this.chunkcache = EMPTY_CACHE;
|
|
this.chunkcount = 0;
|
|
}
|
|
|
|
public RowCollection(final Row rowdef, final int objectCount) throws RowSpaceExceededException {
|
|
this(rowdef);
|
|
ensureSize(objectCount);
|
|
}
|
|
|
|
protected RowCollection(final Row rowdef, final int objectCount, final byte[] cache, final int sortBound) {
|
|
this.rowdef = rowdef;
|
|
this.chunkcache = cache;
|
|
this.chunkcount = objectCount;
|
|
this.sortBound = sortBound;
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
}
|
|
|
|
protected RowCollection(final Row rowdef, final Row.Entry exportedCollectionRowEnvironment) {
|
|
final int chunkcachelength = exportedCollectionRowEnvironment.cellwidth(1) - (int) exportOverheadSize;
|
|
final Row.Entry exportedCollection = exportRow(chunkcachelength).newEntry(exportedCollectionRowEnvironment, 1);
|
|
|
|
this.rowdef = rowdef;
|
|
this.chunkcount = (int) exportedCollection.getColLong(exp_chunkcount);
|
|
if ((this.chunkcount > chunkcachelength / rowdef.objectsize)) {
|
|
Log.logWarning("RowCollection", "corrected wrong chunkcount; chunkcount = " + this.chunkcount + ", chunkcachelength = " + chunkcachelength + ", rowdef.objectsize = " + rowdef.objectsize);
|
|
this.chunkcount = chunkcachelength / rowdef.objectsize; // patch problem
|
|
}
|
|
this.lastTimeWrote = (exportedCollection.getColLong(exp_last_wrote) + 10957) * day;
|
|
final String sortOrderKey = exportedCollection.getColASCII(exp_order_type);
|
|
ByteOrder oldOrder = null;
|
|
if ((sortOrderKey == null) || (sortOrderKey.equals("__"))) {
|
|
oldOrder = null;
|
|
} else {
|
|
oldOrder = NaturalOrder.bySignature(sortOrderKey);
|
|
if (oldOrder == null) oldOrder = Base64Order.bySignature(sortOrderKey);
|
|
}
|
|
if ((rowdef.objectOrder != null) && (oldOrder != null) && (!(rowdef.objectOrder.signature().equals(oldOrder.signature()))))
|
|
throw new kelondroException("old collection order does not match with new order; objectOrder.signature = " + rowdef.objectOrder.signature() + ", oldOrder.signature = " + oldOrder.signature());
|
|
this.sortBound = (int) exportedCollection.getColLong(exp_order_bound);
|
|
if (this.sortBound > this.chunkcount) {
|
|
Log.logWarning("RowCollection", "corrected wrong sortBound; sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount);
|
|
this.sortBound = this.chunkcount;
|
|
}
|
|
this.chunkcache = exportedCollection.getColBytes(exp_collection, false);
|
|
}
|
|
|
|
protected RowCollection(final Row rowdef, final byte[] chunkcache, final int chunkcount, final int sortBound, final long lastTimeWrote) {
|
|
this.rowdef = rowdef;
|
|
this.chunkcache = new byte[chunkcache.length];
|
|
System.arraycopy(chunkcache, 0, this.chunkcache, 0, chunkcache.length);
|
|
this.chunkcount = chunkcount;
|
|
this.sortBound = sortBound;
|
|
this.lastTimeWrote = lastTimeWrote;
|
|
}
|
|
|
|
@Override
|
|
public RowCollection clone() {
|
|
return new RowCollection(this.rowdef, this.chunkcache, this.chunkcount, this.sortBound, this.lastTimeWrote);
|
|
}
|
|
|
|
public void reset() {
|
|
this.chunkcache = new byte[0];
|
|
this.chunkcount = 0;
|
|
this.sortBound = 0;
|
|
}
|
|
|
|
/**
|
|
* calculate the memory that the structure occupies in ram
|
|
* @return number of bytes in use
|
|
*/
|
|
public long mem() {
|
|
return this.chunkcache.length;
|
|
}
|
|
|
|
private static final Row exportMeasureRow = exportRow(0 /* no relevance */);
|
|
|
|
public static final int sizeOfExportedCollectionRows(final Row.Entry exportedCollectionRowEnvironment, final int columnInEnvironment) {
|
|
final Row.Entry exportedCollectionEntry = exportMeasureRow.newEntry(exportedCollectionRowEnvironment, columnInEnvironment);
|
|
final int chunkcount = (int) exportedCollectionEntry.getColLong(exp_chunkcount);
|
|
return chunkcount;
|
|
}
|
|
|
|
private static final long day = 1000 * 60 * 60 * 24;
|
|
|
|
private static int daysSince2000(final long time) {
|
|
return (int) (time / day) - 10957;
|
|
}
|
|
|
|
private static Column exportColumn0, exportColumn1, exportColumn2, exportColumn3, exportColumn4, collectionColumnProducer;
|
|
|
|
protected static final long exportOverheadSize = 14;
|
|
|
|
private static Row exportRow(final int chunkcachelength) {
|
|
/*
|
|
return new Row(
|
|
"int size-4 {b256}," +
|
|
"short lastread-2 {b256}," + // as daysSince2000
|
|
"short lastwrote-2 {b256}," + // as daysSince2000
|
|
"byte[] orderkey-2," +
|
|
"int orderbound-4 {b256}," +
|
|
"byte[] collection-" + chunkcachelength,
|
|
NaturalOrder.naturalOrder
|
|
);
|
|
*/
|
|
|
|
if (exportColumn0 == null) exportColumn0 = new Column("int size-4 {b256}");
|
|
if (exportColumn1 == null) exportColumn1 = new Column("short lastread-2 {b256}");
|
|
if (exportColumn2 == null) exportColumn2 = new Column("short lastwrote-2 {b256}");
|
|
if (exportColumn3 == null) exportColumn3 = new Column("byte[] orderkey-2");
|
|
if (exportColumn4 == null) exportColumn4 = new Column("int orderbound-4 {b256}");
|
|
if (collectionColumnProducer == null) collectionColumnProducer = new Column("byte[] collection-1");
|
|
/*
|
|
* because of a strange bug these objects cannot be initialized as normal
|
|
* static final. If I try that, they are not initialized and are assigned null. why?
|
|
*/
|
|
|
|
Column collectionColumn = (Column) collectionColumnProducer.clone();
|
|
collectionColumn.setCellwidth(chunkcachelength);
|
|
final Row er = new Row(new Column[]{
|
|
exportColumn0, exportColumn1, exportColumn2, exportColumn3, exportColumn4,
|
|
collectionColumn
|
|
},
|
|
NaturalOrder.naturalOrder
|
|
);
|
|
assert er.objectsize == chunkcachelength + exportOverheadSize;
|
|
return er;
|
|
}
|
|
|
|
public synchronized byte[] exportCollection() {
|
|
// returns null if the collection is empty
|
|
sort(); // experimental; supervise CPU load
|
|
//uniq();
|
|
//trim();
|
|
assert this.sortBound == this.chunkcount; // on case the collection is sorted
|
|
assert size() * this.rowdef.objectsize <= this.chunkcache.length : "this.size() = " + size() + ", objectsize = " + this.rowdef.objectsize + ", chunkcache.length = " + this.chunkcache.length;
|
|
final Row row = exportRow(size() * this.rowdef.objectsize);
|
|
final Row.Entry entry = row.newEntry();
|
|
assert (this.sortBound <= this.chunkcount) : "sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount;
|
|
assert (this.chunkcount <= this.chunkcache.length / this.rowdef.objectsize) : "chunkcount = " + this.chunkcount + ", chunkcache.length = " + this.chunkcache.length + ", rowdef.objectsize = " + this.rowdef.objectsize;
|
|
entry.setCol(exp_chunkcount, this.chunkcount);
|
|
entry.setCol(exp_last_read, daysSince2000(System.currentTimeMillis()));
|
|
entry.setCol(exp_last_wrote, daysSince2000(this.lastTimeWrote));
|
|
entry.setCol(exp_order_type, (this.rowdef.objectOrder == null) ? ASCII.getBytes("__") : UTF8.getBytes(this.rowdef.objectOrder.signature()));
|
|
entry.setCol(exp_order_bound, this.sortBound);
|
|
entry.setCol(exp_collection, this.chunkcache);
|
|
return entry.bytes();
|
|
}
|
|
|
|
public void saveCollection(final File file) throws IOException {
|
|
FileUtils.copy(exportCollection(), file);
|
|
}
|
|
|
|
public Row row() {
|
|
return this.rowdef;
|
|
}
|
|
|
|
private final long neededSpaceForEnsuredSize(final int elements, final boolean forcegc) {
|
|
assert elements > 0 : "elements = " + elements;
|
|
final long needed = elements * this.rowdef.objectsize;
|
|
if (this.chunkcache.length >= needed) return 0;
|
|
assert needed > 0 : "needed = " + needed;
|
|
long allocram = needed * growfactorLarge100 / 100L;
|
|
allocram -= allocram % this.rowdef.objectsize;
|
|
assert allocram > 0 : "elements = " + elements + ", new = " + allocram;
|
|
if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, false)) return allocram;
|
|
allocram = needed * growfactorSmall100 / 100L;
|
|
allocram -= allocram % this.rowdef.objectsize;
|
|
assert allocram >= 0 : "elements = " + elements + ", new = " + allocram;
|
|
if (allocram <= Integer.MAX_VALUE && MemoryControl.request(allocram, forcegc)) return allocram;
|
|
return needed;
|
|
}
|
|
|
|
private final void ensureSize(final int elements) throws RowSpaceExceededException {
|
|
if (elements == 0) return;
|
|
final long allocram = neededSpaceForEnsuredSize(elements, true);
|
|
if (allocram == 0) return;
|
|
assert this.chunkcache.length < elements * this.rowdef.objectsize : "wrong alloc computation (1): elements * rowdef.objectsize = " + (elements * this.rowdef.objectsize) + ", chunkcache.length = " + this.chunkcache.length;
|
|
assert allocram > this.chunkcache.length : "wrong alloc computation (2): allocram = " + allocram + ", chunkcache.length = " + this.chunkcache.length;
|
|
if (allocram > Integer.MAX_VALUE || !MemoryControl.request(allocram + 32, true))
|
|
throw new RowSpaceExceededException(allocram + 32, "RowCollection grow");
|
|
try {
|
|
final byte[] newChunkcache = new byte[(int) allocram]; // increase space
|
|
System.arraycopy(this.chunkcache, 0, newChunkcache, 0, this.chunkcache.length);
|
|
this.chunkcache = newChunkcache;
|
|
} catch (final OutOfMemoryError e) {
|
|
// lets try again after a forced gc()
|
|
System.gc();
|
|
try {
|
|
final byte[] newChunkcache = new byte[(int) allocram]; // increase space
|
|
System.arraycopy(this.chunkcache, 0, newChunkcache, 0, this.chunkcache.length);
|
|
this.chunkcache = newChunkcache;
|
|
} catch (final OutOfMemoryError ee) {
|
|
throw new RowSpaceExceededException(allocram, "RowCollection grow after OutOfMemoryError " + ee.getMessage());
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* compute the needed memory in case of a cache extension. That is, if the cache is full and must
|
|
* be copied into a new cache which is larger. In such a case the Collection needs more than the double size
|
|
* than is necessary to store the data. This method computes the extra memory that is needed to perform this task.
|
|
* @return
|
|
*/
|
|
protected final long memoryNeededForGrow() {
|
|
return neededSpaceForEnsuredSize(this.chunkcount + 1, false);
|
|
}
|
|
|
|
@Override
|
|
public int compare(final Entry o1, final Entry o2) {
|
|
return o1.compareTo(o2);
|
|
}
|
|
|
|
@Override
|
|
public Entry buffer() {
|
|
return row().newEntry();
|
|
}
|
|
|
|
@Override
|
|
public void swap(final int i, final int j, final Entry buffer) {
|
|
if (i == j) return;
|
|
final byte[] swapspace = buffer.bytes();
|
|
System.arraycopy(this.chunkcache, this.rowdef.objectsize * i, swapspace, 0, this.rowdef.objectsize);
|
|
System.arraycopy(this.chunkcache, this.rowdef.objectsize * j, this.chunkcache, this.rowdef.objectsize * i, this.rowdef.objectsize);
|
|
System.arraycopy(swapspace, 0, this.chunkcache, this.rowdef.objectsize * j, this.rowdef.objectsize);
|
|
}
|
|
|
|
protected synchronized void trim() {
|
|
if (this.chunkcache.length == 0) return;
|
|
final long needed = this.chunkcount * this.rowdef.objectsize;
|
|
assert needed <= this.chunkcache.length;
|
|
if (needed >= this.chunkcache.length)
|
|
return; // in case that the growfactor causes that the cache would
|
|
// grow instead of shrink, simply ignore the growfactor
|
|
if (MemoryControl.available() + 1000 < needed)
|
|
return; // if the swap buffer is not available, we must give up.
|
|
// This is not critical. Otherwise we provoke a serious
|
|
// problem with OOM
|
|
final byte[] newChunkcache = new byte[(int) needed];
|
|
System.arraycopy(this.chunkcache, 0, newChunkcache, 0, Math.min(this.chunkcache.length, newChunkcache.length));
|
|
this.chunkcache = newChunkcache;
|
|
}
|
|
|
|
public final long lastWrote() {
|
|
return this.lastTimeWrote;
|
|
}
|
|
|
|
protected synchronized final byte[] getKey(final int index) {
|
|
assert (index >= 0) : "get: access with index " + index + " is below zero";
|
|
assert (index < this.chunkcount) : "get: access with index " + index + " is above chunkcount " + this.chunkcount + "; sortBound = " + this.sortBound;
|
|
assert (index * this.rowdef.objectsize < this.chunkcache.length);
|
|
if ((this.chunkcache == null) || (this.rowdef == null)) return null; // case may appear during shutdown
|
|
if (index >= this.chunkcount) return null;
|
|
if ((index + 1) * this.rowdef.objectsize > this.chunkcache.length) return null; // the whole chunk does not fit into the chunkcache
|
|
final byte[] b = new byte[this.rowdef.primaryKeyLength];
|
|
System.arraycopy(this.chunkcache, index * this.rowdef.objectsize, b, 0, b.length);
|
|
return b;
|
|
}
|
|
|
|
@Override
|
|
public synchronized final Row.Entry get(final int index, final boolean clone) {
|
|
assert (index >= 0) : "get: access with index " + index + " is below zero";
|
|
assert (index < this.chunkcount) : "get: access with index " + index + " is above chunkcount " + this.chunkcount + "; sortBound = " + this.sortBound;
|
|
assert (this.chunkcache != null && index * this.rowdef.objectsize < this.chunkcache.length);
|
|
assert this.sortBound <= this.chunkcount : "sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount;
|
|
if ((this.chunkcache == null) || (this.rowdef == null)) return null; // case may appear during shutdown
|
|
Row.Entry entry;
|
|
final int addr = index * this.rowdef.objectsize;
|
|
if (index >= this.chunkcount) return null;
|
|
if (addr + this.rowdef.objectsize > this.chunkcache.length) return null; // the whole chunk does not fit into the chunkcache
|
|
entry = this.rowdef.newEntry(this.chunkcache, addr, clone);
|
|
return entry;
|
|
}
|
|
|
|
public synchronized final void set(final int index, final Row.Entry a) throws RowSpaceExceededException {
|
|
assert (index >= 0) : "set: access with index " + index + " is below zero";
|
|
ensureSize(index + 1);
|
|
final byte[] column = a.bytes();
|
|
assert a.cellwidth(0) == this.rowdef.primaryKeyLength;
|
|
assert column.length >= this.rowdef.primaryKeyLength;
|
|
final boolean sameKey = match(column, 0, index);
|
|
//if (sameKey) System.out.print("$");
|
|
a.writeToArray(this.chunkcache, index * this.rowdef.objectsize);
|
|
if (index >= this.chunkcount) this.chunkcount = index + 1;
|
|
if (!sameKey && index < this.sortBound) this.sortBound = index;
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
}
|
|
|
|
public final void insertUnique(final int index, final Row.Entry a) throws RowSpaceExceededException {
|
|
assert (a != null);
|
|
|
|
if (index < this.chunkcount) {
|
|
// make room
|
|
ensureSize(this.chunkcount + 1);
|
|
System.arraycopy(this.chunkcache, this.rowdef.objectsize * index, this.chunkcache, this.rowdef.objectsize * (index + 1), (this.chunkcount - index) * this.rowdef.objectsize);
|
|
this.chunkcount++;
|
|
}
|
|
// insert entry into gap
|
|
set(index, a);
|
|
}
|
|
|
|
public synchronized void addUnique(final Row.Entry row) throws RowSpaceExceededException {
|
|
final byte[] r = row.bytes();
|
|
addUnique(r, 0, r.length);
|
|
}
|
|
|
|
public synchronized void addUnique(final List<Row.Entry> rows) throws RowSpaceExceededException {
|
|
assert this.sortBound == 0 : "sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount;
|
|
final Iterator<Row.Entry> i = rows.iterator();
|
|
while (i.hasNext()) addUnique(i.next());
|
|
}
|
|
|
|
public synchronized void add(final byte[] a) throws RowSpaceExceededException {
|
|
assert a.length == this.rowdef.objectsize : "a.length = " + a.length + ", objectsize = " + this.rowdef.objectsize;
|
|
addUnique(a, 0, a.length);
|
|
}
|
|
|
|
private final void addUnique(final byte[] a, final int astart, final int alength) throws RowSpaceExceededException {
|
|
assert (a != null);
|
|
assert (astart >= 0) && (astart < a.length) : " astart = " + astart;
|
|
assert (!(Log.allZero(a, astart, alength))) : "a = " + NaturalOrder.arrayList(a, astart, alength);
|
|
assert (alength > 0);
|
|
assert (astart + alength <= a.length);
|
|
assert alength == this.rowdef.objectsize : "alength =" + alength + ", rowdef.objectsize = " + this.rowdef.objectsize;
|
|
final int l = Math.min(this.rowdef.objectsize, Math.min(alength, a.length - astart));
|
|
ensureSize(this.chunkcount + 1);
|
|
System.arraycopy(a, astart, this.chunkcache, this.rowdef.objectsize * this.chunkcount, l);
|
|
this.chunkcount++;
|
|
// if possible, increase the sortbound value to suppress unnecessary sorting
|
|
if (this.chunkcount == 1) {
|
|
assert this.sortBound == 0;
|
|
this.sortBound = 1;
|
|
} else if (
|
|
this.sortBound + 1 == this.chunkcount &&
|
|
this.rowdef.objectOrder.compare(this.chunkcache, this.rowdef.objectsize * (this.chunkcount - 2),
|
|
this.chunkcache, this.rowdef.objectsize * (this.chunkcount - 1), this.rowdef.primaryKeyLength) == -1) {
|
|
this.sortBound = this.chunkcount;
|
|
}
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
}
|
|
|
|
protected final void addSorted(final byte[] a, final int astart, final int alength) throws RowSpaceExceededException {
|
|
assert (a != null);
|
|
assert (astart >= 0) && (astart < a.length) : " astart = " + astart;
|
|
assert (!(Log.allZero(a, astart, alength))) : "a = " + NaturalOrder.arrayList(a, astart, alength);
|
|
assert (alength > 0);
|
|
assert (astart + alength <= a.length);
|
|
assert alength == this.rowdef.objectsize : "alength =" + alength + ", rowdef.objectsize = " + this.rowdef.objectsize;
|
|
final int l = Math.min(this.rowdef.objectsize, Math.min(alength, a.length - astart));
|
|
ensureSize(this.chunkcount + 1);
|
|
System.arraycopy(a, astart, this.chunkcache, this.rowdef.objectsize * this.chunkcount, l);
|
|
this.chunkcount++;
|
|
this.sortBound = this.chunkcount;
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
}
|
|
|
|
public synchronized final void addAllUnique(final RowCollection c) throws RowSpaceExceededException {
|
|
if (c == null) return;
|
|
assert(this.rowdef.objectsize == c.rowdef.objectsize);
|
|
ensureSize(this.chunkcount + c.size());
|
|
System.arraycopy(c.chunkcache, 0, this.chunkcache, this.rowdef.objectsize * this.chunkcount, this.rowdef.objectsize * c.size());
|
|
this.chunkcount += c.size();
|
|
}
|
|
|
|
/**
|
|
* This method removes the entry at position p ensuring the order of the remaining
|
|
* entries if specified by keepOrder.
|
|
* Note: Keeping the order is expensive. If you want to remove more than one element in
|
|
* a batch with this method, it'd be better to do the removes without order keeping and doing
|
|
* the sort after all the removes are done.
|
|
*
|
|
* @param p element at this position will be removed
|
|
* @param keepOrder keep the order of remaining entries
|
|
*/
|
|
public synchronized final void removeRow(final int p, final boolean keepOrder) {
|
|
assert p >= 0 : "p = " + p;
|
|
assert p < this.chunkcount : "p = " + p + ", chunkcount = " + this.chunkcount;
|
|
assert this.chunkcount > 0 : "chunkcount = " + this.chunkcount;
|
|
assert this.sortBound <= this.chunkcount : "sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount;
|
|
if (keepOrder && (p < this.sortBound)) {
|
|
// remove by shift (quite expensive for big collections)
|
|
final int addr = p * this.rowdef.objectsize;
|
|
System.arraycopy(
|
|
this.chunkcache, addr + this.rowdef.objectsize,
|
|
this.chunkcache, addr,
|
|
(this.chunkcount - p - 1) * this.rowdef.objectsize);
|
|
this.sortBound--; // this is only correct if p < sortBound, but this was already checked above
|
|
} else {
|
|
// remove by copying the top-element to the remove position
|
|
if (p != this.chunkcount - 1) {
|
|
System.arraycopy(
|
|
this.chunkcache, (this.chunkcount - 1) * this.rowdef.objectsize,
|
|
this.chunkcache, p * this.rowdef.objectsize,
|
|
this.rowdef.objectsize);
|
|
}
|
|
// we moved the last element to the remove position: (p+1)st element
|
|
// only the first p elements keep their order (element p is already outside the order)
|
|
if (this.sortBound > p) this.sortBound = p;
|
|
}
|
|
this.chunkcount--;
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
}
|
|
|
|
|
|
@Override
|
|
public final void delete(final int p) {
|
|
removeRow(p, true);
|
|
}
|
|
|
|
/**
|
|
* removes the last entry from the collection
|
|
* @return
|
|
*/
|
|
public synchronized Row.Entry removeOne() {
|
|
if (this.chunkcount == 0) return null;
|
|
final Row.Entry r = get(this.chunkcount - 1, true);
|
|
if (this.chunkcount == this.sortBound) this.sortBound--;
|
|
this.chunkcount--;
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
return r;
|
|
}
|
|
|
|
public synchronized List<Row.Entry> top(int count) {
|
|
final ArrayList<Row.Entry> list = new ArrayList<Row.Entry>();
|
|
if (this.chunkcount == 0) return list;
|
|
Row.Entry entry;
|
|
int cursor = this.chunkcount - 1;
|
|
while (count > 0 && cursor >= 0) {
|
|
entry = get(cursor, true);
|
|
list.add(entry);
|
|
count--;
|
|
cursor--;
|
|
}
|
|
return list;
|
|
}
|
|
|
|
public synchronized byte[] smallestKey() {
|
|
if (this.chunkcount == 0) return null;
|
|
sort();
|
|
final Row.Entry r = get(0, false);
|
|
final byte[] b = r.getPrimaryKeyBytes();
|
|
return b;
|
|
}
|
|
|
|
public synchronized byte[] largestKey() {
|
|
if (this.chunkcount == 0) return null;
|
|
sort();
|
|
final Row.Entry r = get(this.chunkcount - 1, false);
|
|
final byte[] b = r.getPrimaryKeyBytes();
|
|
return b;
|
|
}
|
|
|
|
public synchronized void clear() {
|
|
if (this.chunkcache.length == 0) return;
|
|
this.chunkcache = new byte[0];
|
|
this.chunkcount = 0;
|
|
this.sortBound = 0;
|
|
this.lastTimeWrote = System.currentTimeMillis();
|
|
}
|
|
|
|
@Override
|
|
public int size() {
|
|
return this.chunkcount;
|
|
}
|
|
|
|
public boolean isEmpty() {
|
|
return this.chunkcount == 0;
|
|
}
|
|
|
|
public int sorted() {
|
|
return this.sortBound;
|
|
}
|
|
|
|
public synchronized Iterator<byte[]> keys(final boolean keepOrderWhenRemoving) {
|
|
// iterates byte[] - type entries
|
|
return new keyIterator(keepOrderWhenRemoving);
|
|
}
|
|
|
|
/**
|
|
* Iterator for kelondroRowCollection.
|
|
* It supports remove() though it doesn't contain the order of the underlying
|
|
* collection during removes.
|
|
*
|
|
*/
|
|
private class keyIterator implements Iterator<byte[]> {
|
|
|
|
private int p;
|
|
private final boolean keepOrderWhenRemoving;
|
|
|
|
private keyIterator(final boolean keepOrderWhenRemoving) {
|
|
this.p = 0;
|
|
this.keepOrderWhenRemoving = keepOrderWhenRemoving;
|
|
}
|
|
|
|
@Override
|
|
public boolean hasNext() {
|
|
return this.p < RowCollection.this.chunkcount;
|
|
}
|
|
|
|
@Override
|
|
public byte[] next() {
|
|
return getKey(this.p++);
|
|
}
|
|
|
|
@Override
|
|
public void remove() {
|
|
this.p--;
|
|
removeRow(this.p, this.keepOrderWhenRemoving);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* return an iterator for the row entries in this object
|
|
*/
|
|
@Override
|
|
public Iterator<Row.Entry> iterator() {
|
|
// iterates kelondroRow.Entry - type entries
|
|
return new rowIterator();
|
|
}
|
|
|
|
/**
|
|
* Iterator for kelondroRowCollection.
|
|
* It supports remove() and keeps the order of the underlying
|
|
* collection during removes.
|
|
*/
|
|
private class rowIterator implements Iterator<Row.Entry> {
|
|
|
|
private int p;
|
|
|
|
public rowIterator() {
|
|
this.p = 0;
|
|
}
|
|
|
|
@Override
|
|
public boolean hasNext() {
|
|
return this.p < RowCollection.this.chunkcount;
|
|
}
|
|
|
|
@Override
|
|
public Row.Entry next() {
|
|
return get(this.p++, true);
|
|
}
|
|
|
|
@Override
|
|
public void remove() {
|
|
this.p--;
|
|
removeRow(this.p, true);
|
|
}
|
|
|
|
}
|
|
|
|
public final void sort() {
|
|
if (this.sortBound == this.chunkcount) return; // this is sorted
|
|
synchronized (this) {
|
|
if (this.sortBound == this.chunkcount) return; // check again
|
|
//Log.logInfo("RowCollection.sort()", "sorting array of size " + this.chunkcount + ", sortBound = " + this.sortBound);
|
|
net.yacy.cora.sorting.Array.sort(this);
|
|
this.sortBound = this.chunkcount;
|
|
}
|
|
}
|
|
|
|
public static class partitionthread implements Callable<Integer> {
|
|
RowCollection rc;
|
|
int L, R, S;
|
|
|
|
public partitionthread(final RowCollection rc, final int L, final int R, final int S) {
|
|
this.rc = rc;
|
|
this.L = L;
|
|
this.R = R;
|
|
this.S = S;
|
|
}
|
|
|
|
@Override
|
|
public Integer call() throws Exception {
|
|
return Integer.valueOf(this.rc.partition(this.L, this.R, this.S, new byte[this.rc.rowdef.objectsize]));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param L is the first element in the sequence
|
|
* @param R is the right bound of the sequence, and outside of the sequence
|
|
* @param S is the bound of the sorted elements in the sequence
|
|
* @param swapspace
|
|
* @return
|
|
*/
|
|
final int partition(final int L, final int R, int S, final byte[] swapspace) {
|
|
assert (L < R - 1): "L = " + L + ", R = " + R + ", S = " + S;
|
|
assert (R - L >= isortlimit): "L = " + L + ", R = " + R + ", S = " + S + ", isortlimit = " + isortlimit;
|
|
|
|
int p = L;
|
|
int q = R - 1;
|
|
int pivot = pivot(L, R, S);
|
|
if (this.rowdef.objectOrder instanceof Base64Order) {
|
|
while (p <= q) {
|
|
// wenn pivot < S: pivot befindet sich in sortierter Sequenz von L bis S - 1
|
|
// d.h. alle Werte von L bis pivot sind kleiner als das pivot
|
|
// zu finden ist ein minimales p <= q so dass chunk[p] >= pivot
|
|
if ((pivot < S) && (p < pivot)) {
|
|
//System.out.println("+++ saved " + (pivot - p) + " comparisments");
|
|
p = pivot;
|
|
S = 0;
|
|
} else {
|
|
while ((p < R - 1) && (compare(pivot, p) >= 0)) p++; // chunkAt[p] < pivot
|
|
}
|
|
// nun gilt chunkAt[p] >= pivot
|
|
while ((q > L) && (compare(pivot, q) <= 0)) q--; // chunkAt[q] > pivot
|
|
if (p <= q) {
|
|
pivot = swap(p, q, pivot, swapspace);
|
|
p++;
|
|
q--;
|
|
}
|
|
}
|
|
} else {
|
|
while (p <= q) {
|
|
if ((pivot < S) && (p < pivot)) {
|
|
p = pivot;
|
|
S = 0;
|
|
} else {
|
|
while ((p < R - 1) && (compare(pivot, p) >= 0)) p++; // chunkAt[p] < pivot
|
|
}
|
|
while ((q > L) && (compare(pivot, q) <= 0)) q--; // chunkAt[q] > pivot
|
|
if (p <= q) {
|
|
pivot = swap(p, q, pivot, swapspace);
|
|
p++;
|
|
q--;
|
|
}
|
|
}
|
|
}
|
|
// now p is the beginning of the upper sequence
|
|
// finally, the pivot element should be exactly between the two sequences
|
|
// distinguish two cases: pivot in lower and upper sequence
|
|
// to do this it is sufficient to compare the index, not the entry content
|
|
if (pivot < p) {
|
|
// switch the pivot with the element _below_ p, the element in p belongs to the upper sequence
|
|
// and does not fit into the lower sequence
|
|
swap(pivot, p - 1, pivot, swapspace);
|
|
return p - 1;
|
|
} else if (pivot > p) {
|
|
// switch the pivot with p, they are both in the same sequence
|
|
swap(pivot, p, pivot, swapspace);
|
|
return p;
|
|
}
|
|
assert pivot == p;
|
|
return p;
|
|
}
|
|
|
|
private final int pivot(final int L, final int R, final int S) {
|
|
if (S == 0 || S < L) {
|
|
// the collection has no ordering
|
|
// or
|
|
// the collection has an ordering, but this is not relevant for this pivot
|
|
// because the ordered zone is outside of ordering zone
|
|
final int m = picMiddle(L, (3 * L + R - 1) / 4, (L + R - 1) / 2, (L + 3 * R - 3) / 4, R - 1);
|
|
assert L <= m;
|
|
assert m < R;
|
|
return m;
|
|
}
|
|
if (S < R) {
|
|
// the collection has an ordering
|
|
// and part of the ordered zone is inside the to-be-ordered zone
|
|
final int m = picMiddle(L, L + (S - L) / 3, (L + R - 1) / 2, S, R - 1);
|
|
assert L <= m;
|
|
assert m < R;
|
|
return m;
|
|
}
|
|
// use the sorted set to find good pivot:
|
|
// the sort range is fully inside the sorted area:
|
|
// the middle element must be the best
|
|
// (however, it should be skipped because there is no point in sorting this)
|
|
return (L + R - 1) / 2;
|
|
}
|
|
|
|
private final int picMiddle(final int a, final int b, final int c, final int d, final int e) {
|
|
return picMiddle(picMiddle(a, b, c), d, e);
|
|
}
|
|
|
|
private final int picMiddle(final int a, final int b, final int c) {
|
|
if (compare(a, b) > 0) {
|
|
if (compare(c, a) > 0) return a;
|
|
if (compare(b, c) > 0) return b;
|
|
} else {
|
|
if (compare(a, c) > 0) return a;
|
|
if (compare(c, b) > 0) return b;
|
|
}
|
|
return c;
|
|
//if (c < a && a < b || a > b && c > a) return a;
|
|
//if (a < b && c > b || c < b && a > b) return b;
|
|
}
|
|
|
|
private final int swap(final int i, final int j, final int p, final byte[] swapspace) {
|
|
if (i == j) return p;
|
|
System.arraycopy(this.chunkcache, this.rowdef.objectsize * i, swapspace, 0, this.rowdef.objectsize);
|
|
System.arraycopy(this.chunkcache, this.rowdef.objectsize * j, this.chunkcache, this.rowdef.objectsize * i, this.rowdef.objectsize);
|
|
System.arraycopy(swapspace, 0, this.chunkcache, this.rowdef.objectsize * j, this.rowdef.objectsize);
|
|
if (i == p) return j; else if (j == p) return i; else return p;
|
|
}
|
|
|
|
protected synchronized void uniq() {
|
|
Array.uniq(this);
|
|
}
|
|
|
|
public synchronized ArrayList<RowCollection> removeDoubles() throws RowSpaceExceededException {
|
|
assert (this.rowdef.objectOrder != null);
|
|
// removes double-occurrences of chunks
|
|
// in contrast to uniq() this removes also the remaining, non-double entry that had a double-occurrence to the others
|
|
// all removed chunks are returned in an array
|
|
sort();
|
|
final ArrayList<RowCollection> report = new ArrayList<RowCollection>();
|
|
if (this.chunkcount < 2) return report;
|
|
int i = this.chunkcount - 2;
|
|
boolean u = true;
|
|
RowCollection collection = new RowCollection(this.rowdef, 2);
|
|
try {
|
|
while (i >= 0) {
|
|
if (match(i, i + 1)) {
|
|
collection.addUnique(get(i + 1, false));
|
|
removeRow(i + 1, false);
|
|
if (i + 1 < this.chunkcount - 1) u = false;
|
|
} else if (!collection.isEmpty()) {
|
|
// finish collection of double occurrences
|
|
collection.addUnique(get(i + 1, false));
|
|
removeRow(i + 1, false);
|
|
if (i + 1 < this.chunkcount - 1) u = false;
|
|
collection.trim();
|
|
report.add(collection);
|
|
collection = new RowSet(this.rowdef, 2);
|
|
}
|
|
i--;
|
|
}
|
|
} catch (final RuntimeException e) {
|
|
Log.logWarning("kelondroRowCollection", e.getMessage(), e);
|
|
} finally {
|
|
if (!u) sort();
|
|
}
|
|
return report;
|
|
}
|
|
|
|
public synchronized boolean isSorted() {
|
|
assert (this.rowdef.objectOrder != null);
|
|
if (this.chunkcount <= 1) return true;
|
|
if (this.chunkcount != this.sortBound) return false;
|
|
/*
|
|
for (int i = 0; i < chunkcount - 1; i++) {
|
|
//System.out.println("*" + UTF8.String(get(i).getPrimaryKeyBytes()));
|
|
if (compare(i, i + 1) > 0) {
|
|
System.out.println("?" + UTF8.String(get(i + 1, false).getPrimaryKeyBytes()));
|
|
return false;
|
|
}
|
|
}
|
|
*/
|
|
return true;
|
|
}
|
|
|
|
@Override
|
|
public synchronized String toString() {
|
|
final StringBuilder s = new StringBuilder(80);
|
|
final Iterator<Row.Entry> i = iterator();
|
|
if (i.hasNext()) s.append(i.next().toString());
|
|
while (i.hasNext()) s.append(", " + (i.next()).toString());
|
|
return s.toString();
|
|
}
|
|
|
|
private final int compare(final int i, final int j) {
|
|
assert (this.chunkcount * this.rowdef.objectsize <= this.chunkcache.length) : "chunkcount = " + this.chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + this.chunkcache.length;
|
|
assert (i >= 0) && (i < this.chunkcount) : "i = " + i + ", chunkcount = " + this.chunkcount;
|
|
assert (j >= 0) && (j < this.chunkcount) : "j = " + j + ", chunkcount = " + this.chunkcount;
|
|
assert (this.rowdef.objectOrder != null);
|
|
if (i == j) return 0;
|
|
//assert (!bugappearance(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength));
|
|
//assert (!bugappearance(chunkcache, j * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength));
|
|
final int c = this.rowdef.objectOrder.compare(
|
|
this.chunkcache,
|
|
i * this.rowdef.objectsize,
|
|
this.chunkcache,
|
|
j * this.rowdef.objectsize,
|
|
this.rowdef.primaryKeyLength);
|
|
return c;
|
|
}
|
|
|
|
protected int compare(final byte[] a, final int astart, final int chunknumber) {
|
|
assert (chunknumber < this.chunkcount);
|
|
assert a.length - astart >= this.rowdef.primaryKeyLength;
|
|
final int len = Math.min(a.length - astart, this.rowdef.primaryKeyLength);
|
|
return this.rowdef.objectOrder.compare(a, astart, this.chunkcache, chunknumber * this.rowdef.objectsize, len);
|
|
}
|
|
|
|
protected final boolean match(final int i, final int j) {
|
|
assert (this.chunkcount * this.rowdef.objectsize <= this.chunkcache.length) : "chunkcount = " + this.chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + this.chunkcache.length;
|
|
assert (i >= 0) && (i < this.chunkcount) : "i = " + i + ", chunkcount = " + this.chunkcount;
|
|
assert (j >= 0) && (j < this.chunkcount) : "j = " + j + ", chunkcount = " + this.chunkcount;
|
|
if (i >= this.chunkcount) return false;
|
|
if (j >= this.chunkcount) return false;
|
|
assert (this.rowdef.objectOrder != null);
|
|
if (i == j) return true;
|
|
int astart = i * this.rowdef.objectsize;
|
|
int bstart = j * this.rowdef.objectsize;
|
|
int k = this.rowdef.primaryKeyLength;
|
|
while (k-- != 0) {
|
|
if (this.chunkcache[astart++] != this.chunkcache[bstart++]) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
protected boolean match(final byte[] a, int astart, final int chunknumber) {
|
|
if (chunknumber >= this.chunkcount) return false;
|
|
assert a.length - astart >= this.rowdef.primaryKeyLength;
|
|
for (int p = chunknumber * this.rowdef.objectsize,
|
|
len = Math.min(a.length - astart, this.rowdef.primaryKeyLength);
|
|
len != 0;
|
|
len--, astart++, p++) {
|
|
if (a[astart] != this.chunkcache[p]) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
public synchronized void close() {
|
|
this.chunkcache = null;
|
|
}
|
|
|
|
private static long d(final long a, final long b) {
|
|
if (b == 0) return a;
|
|
return a / b;
|
|
}
|
|
|
|
private static Random random = null;
|
|
private static String randomHash() {
|
|
return
|
|
Base64Order.enhancedCoder.encodeLongSB(random.nextLong(), 4).toString() +
|
|
Base64Order.enhancedCoder.encodeLongSB(random.nextLong(), 4).toString() +
|
|
Base64Order.enhancedCoder.encodeLongSB(random.nextLong(), 4).toString();
|
|
}
|
|
|
|
public static void test(final int testsize) throws RowSpaceExceededException {
|
|
final Row r = new Row(new Column[]{
|
|
new Column("hash", Column.celltype_string, Column.encoder_bytes, 12, "hash")},
|
|
Base64Order.enhancedCoder);
|
|
|
|
// test compare method
|
|
random = new Random(0);
|
|
for (int i = 0; i < testsize; i++) {
|
|
final byte[] a = ASCII.getBytes(randomHash());
|
|
final byte[] b = ASCII.getBytes(randomHash());
|
|
final int c = Base64Order.enhancedCoder.compare(a, b);
|
|
if (c == 0 && Base64Order.enhancedCoder.compare(b, a) != 0)
|
|
System.out.println("compare failed / =; a = " + ASCII.String(a) + ", b = " + ASCII.String(b));
|
|
if (c == -1 && Base64Order.enhancedCoder.compare(b, a) != 1)
|
|
System.out.println("compare failed / =; a < " + ASCII.String(a) + ", b = " + ASCII.String(b));
|
|
if (c == 1 && Base64Order.enhancedCoder.compare(b, a) != -1)
|
|
System.out.println("compare failed / =; a > " + ASCII.String(a) + ", b = " + ASCII.String(b));
|
|
}
|
|
|
|
// test sorting methods
|
|
RowCollection a = new RowCollection(r, testsize);
|
|
a.add("AAAAAAAAAAAA".getBytes());
|
|
a.add("BBBBBBBBBBBB".getBytes());
|
|
a.add("BBBBBBBBBBBB".getBytes());
|
|
a.add("BBBBBBBBBBBB".getBytes());
|
|
a.add("CCCCCCCCCCCC".getBytes());
|
|
final ArrayList<RowCollection> del = a.removeDoubles();
|
|
System.out.println(del + "rows double");
|
|
final Iterator<Row.Entry> j = a.iterator();
|
|
while (j.hasNext()) System.out.println(UTF8.String(j.next().bytes()));
|
|
|
|
System.out.println("kelondroRowCollection test with size = " + testsize);
|
|
a = new RowCollection(r, testsize);
|
|
long t0 = System.nanoTime();
|
|
random = new Random(0);
|
|
for (int i = 0; i < testsize / 2; i++) a.add(randomHash().getBytes());
|
|
//System.out.println("check: after first random feed"); for (final Row.Entry w: a) System.out.println("1 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
|
|
random = new Random(0);
|
|
for (int i = 0; i < testsize / 2; i++) a.add(randomHash().getBytes());
|
|
//System.out.println("check: after second random feed"); for (final Row.Entry w: a) System.out.println("2 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
|
|
a.sort();
|
|
//System.out.println("check: after sort"); for (final Row.Entry w: a) System.out.println("3 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
|
|
a.uniq();
|
|
//System.out.println("check: after sort uniq"); for (final Row.Entry w: a) System.out.println("4 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
|
|
// check order that the element have
|
|
for (int i = 0; i < a.size() - 1; i++) {
|
|
if (a.get(i, false).compareTo(a.get(i + 1, false)) >= 0) System.out.println("Compare error at pos " + i + ": a.get(i)=" + a.get(i, false) + ", a.get(i + 1)=" + a.get(i + 1, false));
|
|
}
|
|
|
|
long t1 = System.nanoTime();
|
|
System.out.println("create a : " + (t1 - t0) + " nanoseconds, " + d(testsize, (t1 - t0)) + " entries/nanoseconds; a.size() = " + a.size());
|
|
|
|
final RowCollection c = new RowCollection(r, testsize);
|
|
random = new Random(0);
|
|
t0 = System.nanoTime();
|
|
for (int i = 0; i < testsize; i++) {
|
|
c.add(randomHash().getBytes());
|
|
}
|
|
t1 = System.nanoTime();
|
|
System.out.println("create c : " + (t1 - t0) + " nanoseconds, " + d(testsize, (t1 - t0)) + " entries/nanoseconds");
|
|
final RowCollection d = new RowCollection(r, testsize);
|
|
for (int i = 0; i < testsize; i++) {
|
|
d.add(c.get(i, false).getPrimaryKeyBytes());
|
|
}
|
|
final long t2 = System.nanoTime();
|
|
System.out.println("copy c -> d: " + (t2 - t1) + " nanoseconds, " + d(testsize, (t2 - t1)) + " entries/nanoseconds");
|
|
//availableCPU = 1;
|
|
c.sort();
|
|
final long t3 = System.nanoTime();
|
|
System.out.println("sort c (1) : " + (t3 - t2) + " nanoseconds, " + d(testsize, (t3 - t2)) + " entries/nanoseconds");
|
|
//availableCPU = 2;
|
|
d.sort();
|
|
final long t4 = System.nanoTime();
|
|
System.out.println("sort d (2) : " + (t4 - t3) + " nanoseconds, " + d(testsize, (t4 - t3)) + " entries/nanoseconds");
|
|
c.uniq();
|
|
final long t5 = System.nanoTime();
|
|
System.out.println("uniq c : " + (t5 - t4) + " nanoseconds, " + d(testsize, (t5 - t4)) + " entries/nanoseconds");
|
|
d.uniq();
|
|
final long t6 = System.nanoTime();
|
|
System.out.println("uniq d : " + (t6 - t5) + " nanoseconds, " + d(testsize, (t6 - t5)) + " entries/nanoseconds");
|
|
random = new Random(0);
|
|
final RowSet e = new RowSet(r, testsize);
|
|
for (int i = 0; i < testsize; i++) {
|
|
e.put(r.newEntry(randomHash().getBytes()));
|
|
}
|
|
final long t7 = System.nanoTime();
|
|
System.out.println("create e : " + (t7 - t6) + " nanoseconds, " + d(testsize, (t7 - t6)) + " entries/nanoseconds");
|
|
e.sort();
|
|
final long t8 = System.nanoTime();
|
|
System.out.println("sort e (2) : " + (t8 - t7) + " nanoseconds, " + d(testsize, (t8 - t7)) + " entries/nanoseconds");
|
|
e.uniq();
|
|
final long t9 = System.nanoTime();
|
|
System.out.println("uniq e : " + (t9 - t8) + " nanoseconds, " + d(testsize, (t9 - t8)) + " entries/nanoseconds");
|
|
final boolean cis = c.isSorted();
|
|
final long t10 = System.nanoTime();
|
|
System.out.println("c isSorted = " + ((cis) ? "true" : "false") + ": " + (t10 - t9) + " nanoseconds");
|
|
final boolean dis = d.isSorted();
|
|
final long t11 = System.nanoTime();
|
|
System.out.println("d isSorted = " + ((dis) ? "true" : "false") + ": " + (t11 - t10) + " nanoseconds");
|
|
final boolean eis = e.isSorted();
|
|
final long t12 = System.nanoTime();
|
|
System.out.println("e isSorted = " + ((eis) ? "true" : "false") + ": " + (t12 - t11) + " nanoseconds");
|
|
random = new Random(0);
|
|
boolean allfound = true;
|
|
for (int i = 0; i < testsize; i++) {
|
|
final String rh = randomHash();
|
|
if (e.get(rh.getBytes(), true) == null) {
|
|
allfound = false;
|
|
System.out.println("not found hash " + rh + " at attempt " + i);
|
|
break;
|
|
}
|
|
}
|
|
final long t13 = System.nanoTime();
|
|
System.out.println("e allfound = " + ((allfound) ? "true" : "false") + ": " + (t13 - t12) + " nanoseconds");
|
|
boolean noghosts = true;
|
|
for (int i = 0; i < testsize; i++) {
|
|
if (e.get(randomHash().getBytes(), true) != null) {
|
|
noghosts = false;
|
|
break;
|
|
}
|
|
}
|
|
final long t14 = System.nanoTime();
|
|
System.out.println("e noghosts = " + ((noghosts) ? "true" : "false") + ": " + (t14 - t13) + " nanoseconds");
|
|
System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
|
|
System.out.println();
|
|
}
|
|
|
|
public static void main(final String[] args) {
|
|
try {
|
|
test(500000);
|
|
//test(1000);
|
|
//test(50000);
|
|
//test(100000);
|
|
//test(1000000);
|
|
Log.shutdown();
|
|
Array.terminate();
|
|
} catch (final RowSpaceExceededException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
neues sort
|
|
[{hash=BBBBBBBBBBBB}, {hash=BBBBBBBBBBBB}, {hash=BBBBBBBBBBBB}]rows double
|
|
AAAAAAAAAAAA
|
|
CCCCCCCCCCCC
|
|
kelondroRowCollection test with size = 50000
|
|
create a : 550687000 nanoseconds, 0 entries/nanoseconds; a.size() = 25000
|
|
create c : 31556000 nanoseconds, 0 entries/nanoseconds
|
|
copy c -> d: 13798000 nanoseconds, 0 entries/nanoseconds
|
|
sort c (1) : 80845000 nanoseconds, 0 entries/nanoseconds
|
|
sort d (2) : 79981000 nanoseconds, 0 entries/nanoseconds
|
|
uniq c : 3697000 nanoseconds, 0 entries/nanoseconds
|
|
uniq d : 3649000 nanoseconds, 0 entries/nanoseconds
|
|
create e : 5719968000 nanoseconds, 0 entries/nanoseconds
|
|
sort e (2) : 65563000 nanoseconds, 0 entries/nanoseconds
|
|
uniq e : 3540000 nanoseconds, 0 entries/nanoseconds
|
|
c isSorted = true: 119000 nanoseconds
|
|
d isSorted = true: 90000 nanoseconds
|
|
e isSorted = true: 94000 nanoseconds
|
|
e allfound = true: 64049000 nanoseconds
|
|
e noghosts = true: 57150000 nanoseconds
|
|
Result size: c = 50000, d = 50000, e = 50000
|
|
|
|
altes plus concurrency
|
|
[{hash=BBBBBBBBBBBB}, {hash=BBBBBBBBBBBB}, {hash=BBBBBBBBBBBB}]rows double
|
|
AAAAAAAAAAAA
|
|
CCCCCCCCCCCC
|
|
kelondroRowCollection test with size = 50000
|
|
Compare error at pos 23548: a.get(i)={hash=8dV7ACC_D1ir}, a.get(i + 1)={hash=8Ypevst5u_tV}
|
|
create a : 507683000 nanoseconds, 0 entries/nanoseconds; a.size() = 25001
|
|
create c : 38420000 nanoseconds, 0 entries/nanoseconds
|
|
copy c -> d: 12995000 nanoseconds, 0 entries/nanoseconds
|
|
sort c (1) : 20805000 nanoseconds, 0 entries/nanoseconds
|
|
sort d (2) : 18935000 nanoseconds, 0 entries/nanoseconds
|
|
uniq c : 3712000 nanoseconds, 0 entries/nanoseconds
|
|
uniq d : 3604000 nanoseconds, 0 entries/nanoseconds
|
|
create e : 1333761000 nanoseconds, 0 entries/nanoseconds
|
|
sort e (2) : 16124000 nanoseconds, 0 entries/nanoseconds
|
|
uniq e : 3453000 nanoseconds, 0 entries/nanoseconds
|
|
c isSorted = true: 115000 nanoseconds
|
|
d isSorted = true: 89000 nanoseconds
|
|
e isSorted = true: 94000 nanoseconds
|
|
e allfound = true: 58685000 nanoseconds
|
|
e noghosts = true: 59132000 nanoseconds
|
|
Result size: c = 50000, d = 50000, e = 50000
|
|
|
|
altes ohne concurrency
|
|
[{hash=BBBBBBBBBBBB}, {hash=BBBBBBBBBBBB}, {hash=BBBBBBBBBBBB}]rows double
|
|
AAAAAAAAAAAA
|
|
CCCCCCCCCCCC
|
|
kelondroRowCollection test with size = 50000
|
|
Compare error at pos 23548: a.get(i)={hash=8dV7ACC_D1ir}, a.get(i + 1)={hash=8Ypevst5u_tV}
|
|
create a : 502494000 nanoseconds, 0 entries/nanoseconds; a.size() = 25001
|
|
create c : 36062000 nanoseconds, 0 entries/nanoseconds
|
|
copy c -> d: 16164000 nanoseconds, 0 entries/nanoseconds
|
|
sort c (1) : 32442000 nanoseconds, 0 entries/nanoseconds
|
|
sort d (2) : 32025000 nanoseconds, 0 entries/nanoseconds
|
|
uniq c : 3581000 nanoseconds, 0 entries/nanoseconds
|
|
uniq d : 3561000 nanoseconds, 0 entries/nanoseconds
|
|
create e : 1788591000 nanoseconds, 0 entries/nanoseconds
|
|
sort e (2) : 22318000 nanoseconds, 0 entries/nanoseconds
|
|
uniq e : 3438000 nanoseconds, 0 entries/nanoseconds
|
|
c isSorted = true: 113000 nanoseconds
|
|
d isSorted = true: 89000 nanoseconds
|
|
e isSorted = true: 94000 nanoseconds
|
|
e allfound = true: 64161000 nanoseconds
|
|
e noghosts = true: 55975000 nanoseconds
|
|
Result size: c = 50000, d = 50000, e = 50000
|
|
|
|
*/ |