From d49238a637c2687bea12b650c17a41481455d269 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 13 Mar 2009 10:07:04 +0000 Subject: [PATCH] more performance hacks: better default values for scaling, less memory usage git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5708 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/data/URLAnalysis.java | 8 +++---- .../de/anomic/kelondro/blob/HeapReader.java | 4 ++-- .../de/anomic/kelondro/blob/HeapWriter.java | 2 +- .../de/anomic/kelondro/index/HandleSet.java | 8 +++---- .../kelondro/index/IntegerHandleIndex.java | 14 +++++------ .../kelondro/index/LongHandleIndex.java | 12 +++++----- .../kelondro/index/ObjectIndexCache.java | 23 ++++--------------- .../anomic/kelondro/index/RowCollection.java | 2 +- source/de/anomic/kelondro/index/RowSet.java | 4 ++-- source/de/anomic/kelondro/table/EcoTable.java | 4 ++-- .../de/anomic/kelondro/table/FlexTable.java | 6 ++--- .../de/anomic/kelondro/table/SplitTable.java | 2 +- .../anomic/kelondro/text/IndexCollection.java | 2 +- 13 files changed, 39 insertions(+), 52 deletions(-) diff --git a/source/de/anomic/data/URLAnalysis.java b/source/de/anomic/data/URLAnalysis.java index 2c0ffb748..110532618 100644 --- a/source/de/anomic/data/URLAnalysis.java +++ b/source/de/anomic/data/URLAnalysis.java @@ -407,9 +407,9 @@ public class URLAnalysis { public static int diffurlcol(String metadataPath, String statisticFile, String diffFile) throws IOException { System.out.println("COLLECTION INDEX DIFF URL-COL startup"); - IntegerHandleIndex idx = new IntegerHandleIndex(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(statisticFile)); + IntegerHandleIndex idx = new IntegerHandleIndex(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(statisticFile), 0); MetadataRepository mr = new MetadataRepository(new File(metadataPath)); - HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, 100); + HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, 0, 1000000); System.out.println("COLLECTION INDEX DIFF URL-COL loaded dump, starting diff"); long start = System.currentTimeMillis(); long update = start - 7000; @@ -436,7 +436,7 @@ public class URLAnalysis { // format: 0=text, 1=html, 2=rss/xml System.out.println("URL EXPORT startup"); MetadataRepository mr = new MetadataRepository(new File(metadataPath)); - HandleSet hs = (diffFile == null) ? null : new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile)); + HandleSet hs = (diffFile == null) ? null : new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile), 0); System.out.println("URL EXPORT loaded dump, starting export"); Export e = mr.export(new File(export), ".*", hs, format, false); try { @@ -451,7 +451,7 @@ public class URLAnalysis { System.out.println("URL DELETE startup"); MetadataRepository mr = new MetadataRepository(new File(metadataPath)); int mrSize = mr.size(); - HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile)); + HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile), 0); System.out.println("URL DELETE loaded dump, starting deletion of " + hs.size() + " entries from " + mrSize); for (byte[] refhash: hs) { mr.remove(new String(refhash)); diff --git a/source/de/anomic/kelondro/blob/HeapReader.java b/source/de/anomic/kelondro/blob/HeapReader.java index c3de24099..bf661c08f 100644 --- a/source/de/anomic/kelondro/blob/HeapReader.java +++ b/source/de/anomic/kelondro/blob/HeapReader.java @@ -106,7 +106,7 @@ public class HeapReader { // there is an index and a gap file: // read the index file: try { - this.index = new LongHandleIndex(this.keylength, this.ordering, fif); + this.index = new LongHandleIndex(this.keylength, this.ordering, fif, 1000000); } catch (IOException e) { e.printStackTrace(); return false; @@ -132,7 +132,7 @@ public class HeapReader { // this initializes the this.index object by reading positions from the heap file this.free = new Gap(); - LongHandleIndex.initDataConsumer indexready = LongHandleIndex.asynchronusInitializer(keylength, this.ordering, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024)))); + LongHandleIndex.initDataConsumer indexready = LongHandleIndex.asynchronusInitializer(keylength, this.ordering, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024))), 100000); byte[] key = new byte[keylength]; int reclen; long seek = 0; diff --git a/source/de/anomic/kelondro/blob/HeapWriter.java b/source/de/anomic/kelondro/blob/HeapWriter.java index 820c3c8c7..08bd6b9e5 100644 --- a/source/de/anomic/kelondro/blob/HeapWriter.java +++ b/source/de/anomic/kelondro/blob/HeapWriter.java @@ -71,7 +71,7 @@ public final class HeapWriter { public HeapWriter(final File heapFile, final int keylength, final ByteOrder ordering) throws IOException { this.heapFile = heapFile; this.keylength = keylength; - this.index = new LongHandleIndex(keylength, ordering, 10); + this.index = new LongHandleIndex(keylength, ordering, 10, 100000); this.os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(heapFile), 1024 * 1024)); this.seek = 0; } diff --git a/source/de/anomic/kelondro/index/HandleSet.java b/source/de/anomic/kelondro/index/HandleSet.java index f753b5537..ce45ee761 100644 --- a/source/de/anomic/kelondro/index/HandleSet.java +++ b/source/de/anomic/kelondro/index/HandleSet.java @@ -42,9 +42,9 @@ public class HandleSet implements Iterable { private final Row rowdef; private ObjectIndex index; - public HandleSet(final int keylength, final ByteOrder objectOrder, final int space) { + public HandleSet(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) { this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key")}, objectOrder, 0); - this.index = new ObjectIndexCache(rowdef, space); + this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace); } /** @@ -54,8 +54,8 @@ public class HandleSet implements Iterable { * @param file * @throws IOException */ - public HandleSet(final int keylength, final ByteOrder objectOrder, final File file) throws IOException { - this(keylength, objectOrder, (int) (file.length() / (keylength + 8))); + public HandleSet(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException { + this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace); // read the index dump and fill the index InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); byte[] a = new byte[keylength]; diff --git a/source/de/anomic/kelondro/index/IntegerHandleIndex.java b/source/de/anomic/kelondro/index/IntegerHandleIndex.java index f007f087c..e7f5fb723 100644 --- a/source/de/anomic/kelondro/index/IntegerHandleIndex.java +++ b/source/de/anomic/kelondro/index/IntegerHandleIndex.java @@ -55,9 +55,9 @@ public class IntegerHandleIndex { private final Row rowdef; private ObjectIndexCache index; - public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int space) { + public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) { this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder, 0); - this.index = new ObjectIndexCache(rowdef, space); + this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace); } /** @@ -67,8 +67,8 @@ public class IntegerHandleIndex { * @param file * @throws IOException */ - public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file) throws IOException { - this(keylength, objectOrder, (int) (file.length() / (keylength + 8))); + public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException { + this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace); // read the index dump and fill the index InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); byte[] a = new byte[keylength + 4]; @@ -243,8 +243,8 @@ public class IntegerHandleIndex { * @param bufferSize * @return */ - public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, int bufferSize) { - initDataConsumer initializer = new initDataConsumer(new IntegerHandleIndex(keylength, objectOrder, space), bufferSize); + public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) { + initDataConsumer initializer = new initDataConsumer(new IntegerHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize); ExecutorService service = Executors.newSingleThreadExecutor(); initializer.setResult(service.submit(initializer)); service.shutdown(); @@ -331,7 +331,7 @@ public class IntegerHandleIndex { System.gc(); // for resource measurement long a = MemoryControl.available(); - IntegerHandleIndex idx = new IntegerHandleIndex(12, Base64Order.enhancedCoder, 0); + IntegerHandleIndex idx = new IntegerHandleIndex(12, Base64Order.enhancedCoder, 0, 150000); for (int i = 0; i < count; i++) { idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count)).getBytes(), 1); } diff --git a/source/de/anomic/kelondro/index/LongHandleIndex.java b/source/de/anomic/kelondro/index/LongHandleIndex.java index 6fb2b98ec..f347ee23e 100644 --- a/source/de/anomic/kelondro/index/LongHandleIndex.java +++ b/source/de/anomic/kelondro/index/LongHandleIndex.java @@ -58,9 +58,9 @@ public class LongHandleIndex { * @param objectOrder * @param space */ - public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final int space) { + public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) { this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-8 {b256}")}, objectOrder, 0); - this.index = new ObjectIndexCache(rowdef, space); + this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace); } /** @@ -70,8 +70,8 @@ public class LongHandleIndex { * @param file * @throws IOException */ - public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final File file) throws IOException { - this(keylength, objectOrder, (int) (file.length() / (keylength + 8))); + public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException { + this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace); // read the index dump and fill the index InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); byte[] a = new byte[keylength + 8]; @@ -226,8 +226,8 @@ public class LongHandleIndex { * @param bufferSize * @return */ - public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, int bufferSize) { - initDataConsumer initializer = new initDataConsumer(new LongHandleIndex(keylength, objectOrder, space), bufferSize); + public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) { + initDataConsumer initializer = new initDataConsumer(new LongHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize); ExecutorService service = Executors.newSingleThreadExecutor(); initializer.setResult(service.submit(initializer)); service.shutdown(); diff --git a/source/de/anomic/kelondro/index/ObjectIndexCache.java b/source/de/anomic/kelondro/index/ObjectIndexCache.java index 2a992889f..130f0d10e 100644 --- a/source/de/anomic/kelondro/index/ObjectIndexCache.java +++ b/source/de/anomic/kelondro/index/ObjectIndexCache.java @@ -35,16 +35,17 @@ import de.anomic.kelondro.order.StackIterator; public class ObjectIndexCache implements ObjectIndex { - private static final int spread = 1000; private final Row rowdef; private RowSet index0; private RowSetArray index1; private final Row.EntryComparator entryComparator; + private final int spread; - public ObjectIndexCache(final Row rowdef, final int initialspace) { + public ObjectIndexCache(final Row rowdef, final int initialspace, final int expectedspace) { this.rowdef = rowdef; this.entryComparator = new Row.EntryComparator(rowdef.objectOrder); - reset(initialspace); + this.spread = Math.max(10, expectedspace / 3000); + reset(initialspace); } public void clear() { @@ -66,6 +67,7 @@ public class ObjectIndexCache implements ObjectIndex { // finish initialization phase index0.sort(); index0.uniq(); + index0.trim(false); index1 = new RowSetArray(rowdef, 0, spread); } } @@ -87,21 +89,6 @@ public class ObjectIndexCache implements ObjectIndex { return index1.has(key); } - /* - public synchronized Row.Entry put(final Row.Entry entry) { - assert (entry != null); - finishInitialization(); - // if the new entry is within the initialization part, just overwrite it - assert index0.isSorted(); - final Row.Entry indexentry = index0.remove(entry.getPrimaryKeyBytes()); // keeps ordering - if (indexentry != null) { - index1.put(entry); - return indexentry; - } - // else place it in the index1 - return index1.put(entry); - } - */ public synchronized Row.Entry replace(final Row.Entry entry) { assert (entry != null); finishInitialization(); diff --git a/source/de/anomic/kelondro/index/RowCollection.java b/source/de/anomic/kelondro/index/RowCollection.java index fe7ff59bc..86c43b788 100644 --- a/source/de/anomic/kelondro/index/RowCollection.java +++ b/source/de/anomic/kelondro/index/RowCollection.java @@ -781,7 +781,7 @@ public class RowCollection implements Iterable { try { while (i >= 0) { if (match(i, i + 1)) { - removeRow(i + 1, false); + removeRow(i + 1, true); d++; if (i + 1 < chunkcount - 1) u = false; } diff --git a/source/de/anomic/kelondro/index/RowSet.java b/source/de/anomic/kelondro/index/RowSet.java index 084a91721..d03b9f36a 100644 --- a/source/de/anomic/kelondro/index/RowSet.java +++ b/source/de/anomic/kelondro/index/RowSet.java @@ -36,7 +36,7 @@ import de.anomic.kelondro.order.NaturalOrder; public class RowSet extends RowCollection implements ObjectIndex, Iterable { - private static final int collectionReSortLimit = 400; + private static final int collectionReSortLimit = 300; public RowSet(final RowSet rs) { super(rs); @@ -190,7 +190,7 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable (collectionReSortLimit << 1)) { + if ((this.chunkcount - this.sortBound) > collectionReSortLimit) { sort(); } diff --git a/source/de/anomic/kelondro/table/EcoTable.java b/source/de/anomic/kelondro/table/EcoTable.java index 6974b49db..f283f8aa0 100644 --- a/source/de/anomic/kelondro/table/EcoTable.java +++ b/source/de/anomic/kelondro/table/EcoTable.java @@ -132,7 +132,7 @@ public class EcoTable implements ObjectIndex { table = null; System.gc(); Log.logSevere("ECOTABLE", tablefile + ": RAM after releasing the table: " + (MemoryControl.available() / 1024 / 1024) + "MB"); } - index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, records); + index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, records, 100000); Log.logInfo("ECOTABLE", tablefile + ": EcoTable " + tablefile.toString() + " has table copy " + ((table == null) ? "DISABLED" : "ENABLED")); // read all elements from the file into the copy table @@ -586,7 +586,7 @@ public class EcoTable implements ObjectIndex { // initialize index and copy table table = (table == null) ? null : new RowSet(taildef, 1); - index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, 1); + index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, 1, 100000); } public Row row() { diff --git a/source/de/anomic/kelondro/table/FlexTable.java b/source/de/anomic/kelondro/table/FlexTable.java index f81180c86..f2145342b 100644 --- a/source/de/anomic/kelondro/table/FlexTable.java +++ b/source/de/anomic/kelondro/table/FlexTable.java @@ -112,7 +112,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { } catch (final IOException e) { if (resetOnFail) { RAMIndex = true; - index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0); + index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0); } else { throw new kelondroException(e.getMessage()); } @@ -122,7 +122,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { public void clear() throws IOException { super.reset(); RAMIndex = true; - index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0); + index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0); } public static int staticSize(final File path, final String tablename) { @@ -148,7 +148,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { private IntegerHandleIndex initializeRamIndex(final int initialSpace) { final int space = Math.max(super.col[0].size(), initialSpace) + 1; if (space < 0) throw new kelondroException("wrong space: " + space); - final IntegerHandleIndex ri = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, space); + final IntegerHandleIndex ri = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, space, 0); final Iterator content = super.col[0].contentNodes(-1); Node node; int i; diff --git a/source/de/anomic/kelondro/table/SplitTable.java b/source/de/anomic/kelondro/table/SplitTable.java index ea94617c4..b0c16440f 100644 --- a/source/de/anomic/kelondro/table/SplitTable.java +++ b/source/de/anomic/kelondro/table/SplitTable.java @@ -67,7 +67,7 @@ public class SplitTable implements ObjectIndex { // the table type can be either kelondroFlex or kelondroEco private static final int EcoFSBufferSize = 20; - static final ObjectIndex dummyIndex = new ObjectIndexCache(new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, 2, "key")}, NaturalOrder.naturalOrder, 0), 0); + static final ObjectIndex dummyIndex = new ObjectIndexCache(new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, 2, "key")}, NaturalOrder.naturalOrder, 0), 0, 0); // the thread pool for the keeperOf executor service private ExecutorService executor; diff --git a/source/de/anomic/kelondro/text/IndexCollection.java b/source/de/anomic/kelondro/text/IndexCollection.java index ca2976941..e3a3902fd 100644 --- a/source/de/anomic/kelondro/text/IndexCollection.java +++ b/source/de/anomic/kelondro/text/IndexCollection.java @@ -411,7 +411,7 @@ public class IndexCollection implements Index { final String[] list = path.list(); FixedWidthArray array; System.out.println("COLLECTION INDEX REFERENCE COLLECTION startup"); - IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 100000); + IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 0, 1000000); for (int i = 0; i < list.length; i++) if (list[i].endsWith(".kca")) { // open array final int pos = list[i].indexOf('.');