more performance hacks: better default values for scaling, less memory usage

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5708 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 39644dc14e
commit d49238a637

@ -407,9 +407,9 @@ public class URLAnalysis {
public static int diffurlcol(String metadataPath, String statisticFile, String diffFile) throws IOException {
System.out.println("COLLECTION INDEX DIFF URL-COL startup");
IntegerHandleIndex idx = new IntegerHandleIndex(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(statisticFile));
IntegerHandleIndex idx = new IntegerHandleIndex(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(statisticFile), 0);
MetadataRepository mr = new MetadataRepository(new File(metadataPath));
HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, 100);
HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, 0, 1000000);
System.out.println("COLLECTION INDEX DIFF URL-COL loaded dump, starting diff");
long start = System.currentTimeMillis();
long update = start - 7000;
@ -436,7 +436,7 @@ public class URLAnalysis {
// format: 0=text, 1=html, 2=rss/xml
System.out.println("URL EXPORT startup");
MetadataRepository mr = new MetadataRepository(new File(metadataPath));
HandleSet hs = (diffFile == null) ? null : new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile));
HandleSet hs = (diffFile == null) ? null : new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile), 0);
System.out.println("URL EXPORT loaded dump, starting export");
Export e = mr.export(new File(export), ".*", hs, format, false);
try {
@ -451,7 +451,7 @@ public class URLAnalysis {
System.out.println("URL DELETE startup");
MetadataRepository mr = new MetadataRepository(new File(metadataPath));
int mrSize = mr.size();
HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile));
HandleSet hs = new HandleSet(MetadataRowContainer.rowdef.primaryKeyLength, MetadataRowContainer.rowdef.objectOrder, new File(diffFile), 0);
System.out.println("URL DELETE loaded dump, starting deletion of " + hs.size() + " entries from " + mrSize);
for (byte[] refhash: hs) {
mr.remove(new String(refhash));

@ -106,7 +106,7 @@ public class HeapReader {
// there is an index and a gap file:
// read the index file:
try {
this.index = new LongHandleIndex(this.keylength, this.ordering, fif);
this.index = new LongHandleIndex(this.keylength, this.ordering, fif, 1000000);
} catch (IOException e) {
e.printStackTrace();
return false;
@ -132,7 +132,7 @@ public class HeapReader {
// this initializes the this.index object by reading positions from the heap file
this.free = new Gap();
LongHandleIndex.initDataConsumer indexready = LongHandleIndex.asynchronusInitializer(keylength, this.ordering, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024))));
LongHandleIndex.initDataConsumer indexready = LongHandleIndex.asynchronusInitializer(keylength, this.ordering, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024))), 100000);
byte[] key = new byte[keylength];
int reclen;
long seek = 0;

@ -71,7 +71,7 @@ public final class HeapWriter {
public HeapWriter(final File heapFile, final int keylength, final ByteOrder ordering) throws IOException {
this.heapFile = heapFile;
this.keylength = keylength;
this.index = new LongHandleIndex(keylength, ordering, 10);
this.index = new LongHandleIndex(keylength, ordering, 10, 100000);
this.os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(heapFile), 1024 * 1024));
this.seek = 0;
}

@ -42,9 +42,9 @@ public class HandleSet implements Iterable<byte[]> {
private final Row rowdef;
private ObjectIndex index;
public HandleSet(final int keylength, final ByteOrder objectOrder, final int space) {
public HandleSet(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key")}, objectOrder, 0);
this.index = new ObjectIndexCache(rowdef, space);
this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace);
}
/**
@ -54,8 +54,8 @@ public class HandleSet implements Iterable<byte[]> {
* @param file
* @throws IOException
*/
public HandleSet(final int keylength, final ByteOrder objectOrder, final File file) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)));
public HandleSet(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace);
// read the index dump and fill the index
InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
byte[] a = new byte[keylength];

@ -55,9 +55,9 @@ public class IntegerHandleIndex {
private final Row rowdef;
private ObjectIndexCache index;
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int space) {
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder, 0);
this.index = new ObjectIndexCache(rowdef, space);
this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace);
}
/**
@ -67,8 +67,8 @@ public class IntegerHandleIndex {
* @param file
* @throws IOException
*/
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)));
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace);
// read the index dump and fill the index
InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
byte[] a = new byte[keylength + 4];
@ -243,8 +243,8 @@ public class IntegerHandleIndex {
* @param bufferSize
* @return
*/
public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new IntegerHandleIndex(keylength, objectOrder, space), bufferSize);
public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new IntegerHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize);
ExecutorService service = Executors.newSingleThreadExecutor();
initializer.setResult(service.submit(initializer));
service.shutdown();
@ -331,7 +331,7 @@ public class IntegerHandleIndex {
System.gc(); // for resource measurement
long a = MemoryControl.available();
IntegerHandleIndex idx = new IntegerHandleIndex(12, Base64Order.enhancedCoder, 0);
IntegerHandleIndex idx = new IntegerHandleIndex(12, Base64Order.enhancedCoder, 0, 150000);
for (int i = 0; i < count; i++) {
idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count)).getBytes(), 1);
}

@ -58,9 +58,9 @@ public class LongHandleIndex {
* @param objectOrder
* @param space
*/
public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final int space) {
public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-8 {b256}")}, objectOrder, 0);
this.index = new ObjectIndexCache(rowdef, space);
this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace);
}
/**
@ -70,8 +70,8 @@ public class LongHandleIndex {
* @param file
* @throws IOException
*/
public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final File file) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)));
public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace);
// read the index dump and fill the index
InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
byte[] a = new byte[keylength + 8];
@ -226,8 +226,8 @@ public class LongHandleIndex {
* @param bufferSize
* @return
*/
public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new LongHandleIndex(keylength, objectOrder, space), bufferSize);
public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new LongHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize);
ExecutorService service = Executors.newSingleThreadExecutor();
initializer.setResult(service.submit(initializer));
service.shutdown();

@ -35,16 +35,17 @@ import de.anomic.kelondro.order.StackIterator;
public class ObjectIndexCache implements ObjectIndex {
private static final int spread = 1000;
private final Row rowdef;
private RowSet index0;
private RowSetArray index1;
private final Row.EntryComparator entryComparator;
private final int spread;
public ObjectIndexCache(final Row rowdef, final int initialspace) {
public ObjectIndexCache(final Row rowdef, final int initialspace, final int expectedspace) {
this.rowdef = rowdef;
this.entryComparator = new Row.EntryComparator(rowdef.objectOrder);
reset(initialspace);
this.spread = Math.max(10, expectedspace / 3000);
reset(initialspace);
}
public void clear() {
@ -66,6 +67,7 @@ public class ObjectIndexCache implements ObjectIndex {
// finish initialization phase
index0.sort();
index0.uniq();
index0.trim(false);
index1 = new RowSetArray(rowdef, 0, spread);
}
}
@ -87,21 +89,6 @@ public class ObjectIndexCache implements ObjectIndex {
return index1.has(key);
}
/*
public synchronized Row.Entry put(final Row.Entry entry) {
assert (entry != null);
finishInitialization();
// if the new entry is within the initialization part, just overwrite it
assert index0.isSorted();
final Row.Entry indexentry = index0.remove(entry.getPrimaryKeyBytes()); // keeps ordering
if (indexentry != null) {
index1.put(entry);
return indexentry;
}
// else place it in the index1
return index1.put(entry);
}
*/
public synchronized Row.Entry replace(final Row.Entry entry) {
assert (entry != null);
finishInitialization();

@ -781,7 +781,7 @@ public class RowCollection implements Iterable<Row.Entry> {
try {
while (i >= 0) {
if (match(i, i + 1)) {
removeRow(i + 1, false);
removeRow(i + 1, true);
d++;
if (i + 1 < chunkcount - 1) u = false;
}

@ -36,7 +36,7 @@ import de.anomic.kelondro.order.NaturalOrder;
public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.Entry> {
private static final int collectionReSortLimit = 400;
private static final int collectionReSortLimit = 300;
public RowSet(final RowSet rs) {
super(rs);
@ -190,7 +190,7 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.E
if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount);
if ((this.chunkcount - this.sortBound) > (collectionReSortLimit << 1)) {
if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
sort();
}

@ -132,7 +132,7 @@ public class EcoTable implements ObjectIndex {
table = null; System.gc();
Log.logSevere("ECOTABLE", tablefile + ": RAM after releasing the table: " + (MemoryControl.available() / 1024 / 1024) + "MB");
}
index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, records);
index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, records, 100000);
Log.logInfo("ECOTABLE", tablefile + ": EcoTable " + tablefile.toString() + " has table copy " + ((table == null) ? "DISABLED" : "ENABLED"));
// read all elements from the file into the copy table
@ -586,7 +586,7 @@ public class EcoTable implements ObjectIndex {
// initialize index and copy table
table = (table == null) ? null : new RowSet(taildef, 1);
index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, 1);
index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, 1, 100000);
}
public Row row() {

@ -112,7 +112,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
} catch (final IOException e) {
if (resetOnFail) {
RAMIndex = true;
index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0);
index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0);
} else {
throw new kelondroException(e.getMessage());
}
@ -122,7 +122,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
public void clear() throws IOException {
super.reset();
RAMIndex = true;
index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0);
index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0);
}
public static int staticSize(final File path, final String tablename) {
@ -148,7 +148,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
private IntegerHandleIndex initializeRamIndex(final int initialSpace) {
final int space = Math.max(super.col[0].size(), initialSpace) + 1;
if (space < 0) throw new kelondroException("wrong space: " + space);
final IntegerHandleIndex ri = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, space);
final IntegerHandleIndex ri = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, space, 0);
final Iterator<Node> content = super.col[0].contentNodes(-1);
Node node;
int i;

@ -67,7 +67,7 @@ public class SplitTable implements ObjectIndex {
// the table type can be either kelondroFlex or kelondroEco
private static final int EcoFSBufferSize = 20;
static final ObjectIndex dummyIndex = new ObjectIndexCache(new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, 2, "key")}, NaturalOrder.naturalOrder, 0), 0);
static final ObjectIndex dummyIndex = new ObjectIndexCache(new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, 2, "key")}, NaturalOrder.naturalOrder, 0), 0, 0);
// the thread pool for the keeperOf executor service
private ExecutorService executor;

@ -411,7 +411,7 @@ public class IndexCollection implements Index {
final String[] list = path.list();
FixedWidthArray array;
System.out.println("COLLECTION INDEX REFERENCE COLLECTION startup");
IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 100000);
IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 0, 1000000);
for (int i = 0; i < list.length; i++) if (list[i].endsWith(".kca")) {
// open array
final int pos = list[i].indexOf('.');

Loading…
Cancel
Save