From 965fabfb87051286d3d21c010a30d90812f717b2 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 24 Aug 2011 10:27:19 +0000 Subject: [PATCH] enhanced sorting speed (affects all DB operations) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7892 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../yacy/kelondro/index/RowCollection.java | 14 ++-- source/net/yacy/kelondro/index/RowSet.java | 73 +++++-------------- 2 files changed, 27 insertions(+), 60 deletions(-) diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java index ee3c2ec2a..4372cffcc 100644 --- a/source/net/yacy/kelondro/index/RowCollection.java +++ b/source/net/yacy/kelondro/index/RowCollection.java @@ -618,6 +618,7 @@ public class RowCollection implements Sortable, Iterable, if (this.sortBound == this.chunkcount) return; // this is sorted synchronized (this) { if (this.sortBound == this.chunkcount) return; // check again + //Log.logInfo("RowCollection.sort()", "sorting array of size " + this.chunkcount + ", sortBound = " + this.sortBound); net.yacy.cora.sorting.Array.sort(this); this.sortBound = this.chunkcount; } @@ -839,7 +840,7 @@ public class RowCollection implements Sortable, Iterable, return c; } - protected synchronized int compare(final byte[] a, final int astart, final int chunknumber) { + protected int compare(final byte[] a, final int astart, final int chunknumber) { assert (chunknumber < this.chunkcount); assert a.length - astart >= this.rowdef.primaryKeyLength; final int len = Math.min(a.length - astart, this.rowdef.primaryKeyLength); @@ -863,13 +864,14 @@ public class RowCollection implements Sortable, Iterable, return true; } - protected synchronized boolean match(final byte[] a, int astart, final int chunknumber) { + protected boolean match(final byte[] a, int astart, final int chunknumber) { if (chunknumber >= this.chunkcount) return false; - int p = chunknumber * this.rowdef.objectsize; assert a.length - astart >= this.rowdef.primaryKeyLength; - int len = Math.min(a.length - astart, this.rowdef.primaryKeyLength); - while (len-- != 0) { - if (a[astart++] != this.chunkcache[p++]) return false; + for (int p = chunknumber * this.rowdef.objectsize, + len = Math.min(a.length - astart, this.rowdef.primaryKeyLength); + len != 0; + len--, astart++, p++) { + if (a[astart] != this.chunkcache[p]) return false; } return true; } diff --git a/source/net/yacy/kelondro/index/RowSet.java b/source/net/yacy/kelondro/index/RowSet.java index d21aab913..9a1fe56e7 100644 --- a/source/net/yacy/kelondro/index/RowSet.java +++ b/source/net/yacy/kelondro/index/RowSet.java @@ -42,7 +42,7 @@ import net.yacy.kelondro.util.MemoryControl; public class RowSet extends RowCollection implements Index, Iterable { - private static final int collectionReSortLimit = 300; + private static final int collectionReSortLimit = 3000; public RowSet(final RowSet rs) { super(rs); @@ -157,10 +157,6 @@ public class RowSet extends RowCollection implements Index, Iterable public final boolean put(final Row.Entry entry) throws RowSpaceExceededException { assert (entry != null); assert (entry.getPrimaryKeyBytes() != null); - // when reaching a specific amount of un-sorted entries, re-sort all - if ((this.chunkcount - this.sortBound) > collectionReSortLimit) { - sort(); - } synchronized (this) { assert entry.bytes().length >= this.rowdef.primaryKeyLength; final int index = find(entry.bytes(), 0); @@ -576,70 +572,38 @@ public class RowSet extends RowCollection implements Index, Iterable d.trim(); System.out.println("TRIM : " + d.toString()); - - /* // second test - c = new kelondroRowSet(new kelondroRow(new int[]{10, 3})); - c.setOrdering(kelondroNaturalOrder.naturalOrder, 0); - Random rand = new Random(0); + final Row row = new Row("byte[] key-10, Cardinal x-3 {b256}", NaturalOrder.naturalOrder); + RowSet c = new RowSet(row); + final Random rand = new Random(0); long start = System.currentTimeMillis(); - long t, d = 0; + long t; String w; - for (long k = 0; k < 60000; k++) { + for (long k = 1; k <= 60000; k++) { t = System.currentTimeMillis(); w = "a" + Long.toString(rand.nextLong()); - c.add(w.getBytes()); + try { + c.put(row.newEntry(new byte[][]{w.getBytes(), "000".getBytes()})); + //c.add(w.getBytes()); + } catch (final RowSpaceExceededException e) { + e.printStackTrace(); + } if (k % 10000 == 0) System.out.println("added " + k + " entries in " + ((t - start) / 1000) + " seconds, " + (((t - start) > 1000) ? (k / ((t - start) / 1000)) : k) + " entries/second, size = " + c.size()); } - System.out.println("bevore sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); - c.shape(); - System.out.println("after sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); + System.out.println("bevore sort: " + (System.currentTimeMillis() - start) + " milliseconds, size: " + c.size()); + c.sort(); + System.out.println("after sort: " + (System.currentTimeMillis() - start) + " milliseconds, size: " + c.size()); c.uniq(); - System.out.println("after uniq: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); - System.out.println("RESULT SIZE: " + c.size()); + System.out.println("after uniq: " + (System.currentTimeMillis() - start) + " milliseconds, size: " + c.size()); System.out.println(); - // third test - c = new kelondroRowSet(new kelondroRow(new int[]{10, 3}), 60000); - c.setOrdering(kelondroNaturalOrder.naturalOrder, 0); - rand = new Random(0); - start = System.currentTimeMillis(); - d = 0; - for (long k = 0; k < 60000; k++) { - t = System.currentTimeMillis(); - w = "a" + Long.toString(rand.nextLong()); - if (c.get(w.getBytes(), 0, 10) == null) c.add(w.getBytes()); else d++; - if (k % 10000 == 0) - System.out.println("added " + k + " entries in " + - ((t - start) / 1000) + " seconds, " + - (((t - start) > 1000) ? (k / ((t - start) / 1000)) : k) + - " entries/second, " + d + " double, size = " + c.size() + - ", sum = " + (c.size() + d)); - } - System.out.println("RESULT SIZE: " + c.size()); - */ - /* - // performance test for put - long start = System.currentTimeMillis(); - kelondroRowSet c = new kelondroRowSet(new kelondroRow("byte[] a-12, byte[] b-12"), 0); - Random random = new Random(0); - byte[] key; - for (int i = 0; i < 100000; i++) { - key = randomHash(random); - c.put(c.rowdef.newEntry(new byte[][]{key, key})); - if (i % 1000 == 0) System.out.println(i + " entries. "); - } - System.out.println("RESULT SIZE: " + c.size()); - System.out.println("Time: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); - */ - // remove test - final long start = System.currentTimeMillis(); - final RowSet c = new RowSet(new Row("byte[] a-12, byte[] b-12", Base64Order.enhancedCoder)); + start = System.currentTimeMillis(); + c = new RowSet(new Row("byte[] a-12, byte[] b-12", Base64Order.enhancedCoder)); byte[] key; final int testsize = 5000; final byte[][] delkeys = new byte[testsize / 5][]; @@ -675,6 +639,7 @@ public class RowSet extends RowCollection implements Index, Iterable c.sort(); System.out.println("RESULT SIZE: " + c.size()); System.out.println("Time: " + ((System.currentTimeMillis() - start) / 1000) + " seconds"); + System.exit(0); } public static byte[] randomHash(final long r0, final long r1) {