From b80db04667cde64d8e41692bb6fa7780098ac4c5 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 8 Mar 2009 21:37:17 +0000 Subject: [PATCH] - refactoring of IntegerHandleIndex and LongHandleIndex (better method names) - fix for problem in httpdFileHandler: mising close of open Files if tempate cache was disabled - more memory for DHT selection required - stub for URL reference hash statistics in index collections git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5682 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/data/URLAnalysis.java | 12 +- source/de/anomic/http/httpdFileHandler.java | 64 +++++------ source/de/anomic/kelondro/blob/BLOBHeap.java | 8 +- .../kelondro/blob/BLOBHeapModifier.java | 6 +- .../de/anomic/kelondro/blob/HeapReader.java | 8 +- .../de/anomic/kelondro/blob/HeapWriter.java | 4 +- .../kelondro/index/IntegerHandleIndex.java | 92 ++++++++++++++- .../kelondro/index/LongHandleIndex.java | 39 ++++++- source/de/anomic/kelondro/table/EcoTable.java | 34 +++--- .../de/anomic/kelondro/table/FlexTable.java | 26 ++--- .../anomic/kelondro/text/IndexCollection.java | 105 ++++++++++++++---- source/de/anomic/kelondro/util/FileUtils.java | 1 + .../de/anomic/plasma/plasmaSwitchboard.java | 2 +- 13 files changed, 290 insertions(+), 111 deletions(-) diff --git a/source/de/anomic/data/URLAnalysis.java b/source/de/anomic/data/URLAnalysis.java index febd203ec..2ee600b34 100644 --- a/source/de/anomic/data/URLAnalysis.java +++ b/source/de/anomic/data/URLAnalysis.java @@ -380,7 +380,13 @@ public class URLAnalysis { System.out.println("finished"); } - + /* + public static void used(String collectionPath, String statisticPath) { + File collections = new File(collectionPath); + File out = new File(statisticPath); + IntegerHandleIndex idx = IndexCollection.referenceHashes(collctions, filenameStub, keylength, indexOrder, payloadrow) + } +*/ public static void main(String[] args) { // example: java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -stat DATA/EXPORT/urls1.txt.gz if (args[0].equals("-stat") && args.length >= 2) { @@ -388,7 +394,9 @@ public class URLAnalysis { } else if (args[0].equals("-host") && args.length >= 2) { for (int i = 1; i < args.length; i++) genhost(args[i]); } else if (args[0].equals("-sort") && args.length >= 2) { - for (int i = 1; i < args.length; i++) sortsplit(args[i]); + for (int i = 1; i < args.length; i++) sortsplit(args[i]); + //} else if (args[0].equals("-incollection") && args.length >= 2) { + // used(args[1], args[2]); } else { System.out.println("usage:"); System.out.println("-stat generate a statistics about common words in file, store to .stat"); diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java index f612c24e4..5682b2972 100644 --- a/source/de/anomic/http/httpdFileHandler.java +++ b/source/de/anomic/http/httpdFileHandler.java @@ -86,6 +86,7 @@ import de.anomic.kelondro.util.ByteBuffer; import de.anomic.kelondro.util.DateFormatter; import de.anomic.kelondro.util.Log; import de.anomic.kelondro.util.FileUtils; +import de.anomic.kelondro.util.MemoryControl; import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboardConstants; @@ -812,45 +813,43 @@ public final class httpdFileHandler { // read the file/template TemplateCacheEntry templateCacheEntry = null; - if (useTemplateCache) { - final long fileSize = targetFile.length(); - if (fileSize <= 512 * 1024) { - // read from cache - SoftReference ref = templateCache.get(targetFile); - if (ref != null) { - templateCacheEntry = ref.get(); - if (templateCacheEntry == null) templateCache.remove(targetFile); - } + long fileSize = targetFile.length(); + if (useTemplateCache && fileSize <= 512 * 1024) { + // read from cache + SoftReference ref = templateCache.get(targetFile); + if (ref != null) { + templateCacheEntry = ref.get(); + if (templateCacheEntry == null) templateCache.remove(targetFile); + } - Date targetFileDate = new Date(targetFile.lastModified()); - if (templateCacheEntry == null || targetFileDate.after(templateCacheEntry.lastModified)) { - // loading the content of the template file into - // a byte array - templateCacheEntry = new TemplateCacheEntry(); - templateCacheEntry.lastModified = targetFileDate; - templateCacheEntry.content = FileUtils.read(targetFile); + Date targetFileDate = new Date(targetFile.lastModified()); + if (templateCacheEntry == null || targetFileDate.after(templateCacheEntry.lastModified)) { + // loading the content of the template file into + // a byte array + templateCacheEntry = new TemplateCacheEntry(); + templateCacheEntry.lastModified = targetFileDate; + templateCacheEntry.content = FileUtils.read(targetFile); - // storing the content into the cache - ref = new SoftReference(templateCacheEntry); - templateCache.put(targetFile, ref); - if (theLogger.isFinest()) theLogger.logFinest("Cache MISS for file " + targetFile); - } else { - if (theLogger.isFinest()) theLogger.logFinest("Cache HIT for file " + targetFile); - } - - // creating an inputstream needed by the template - // rewrite function - fis = new ByteArrayInputStream(templateCacheEntry.content); - templateCacheEntry = null; + // storing the content into the cache + ref = new SoftReference(templateCacheEntry); + templateCache.put(targetFile, ref); + if (theLogger.isFinest()) theLogger.logFinest("Cache MISS for file " + targetFile); } else { - // read from file directly - fis = new BufferedInputStream(new FileInputStream(targetFile)); + if (theLogger.isFinest()) theLogger.logFinest("Cache HIT for file " + targetFile); } + + // creating an inputstream needed by the template + // rewrite function + fis = new ByteArrayInputStream(templateCacheEntry.content); + templateCacheEntry = null; + } else if (fileSize <= Math.min(4 * 1024 * 1204, MemoryControl.available() / 100)) { + // read file completely into ram, avoid that too many files are open at the same time + fis = new ByteArrayInputStream(FileUtils.read(targetFile)); } else { fis = new BufferedInputStream(new FileInputStream(targetFile)); } - if(mimeType.startsWith("text")) { + if (mimeType.startsWith("text")) { // every text-file distributed by yacy is UTF-8 if(!path.startsWith("/repository")) { mimeType = mimeType + "; charset=UTF-8"; @@ -880,6 +879,7 @@ public final class httpdFileHandler { final ByteBuffer o = new ByteBuffer(); // apply templates httpTemplate.writeTemplate(fis, o, templatePatterns, "-UNRESOLVED_PATTERN-".getBytes("UTF-8")); + fis.close(); httpd.sendRespondHeader(conProp, out, httpVersion, 200, null, mimeType, -1, targetDate, null, (templatePatterns == null) ? new httpResponseHeader() : templatePatterns.getOutgoingHeader(), null, "chunked", nocache); // send the content in chunked parts, see RFC 2616 section 3.6.1 final httpChunkedOutputStream chos = new httpChunkedOutputStream(out); @@ -892,7 +892,7 @@ public final class httpdFileHandler { // apply templates final ByteBuffer o1 = new ByteBuffer(); httpTemplate.writeTemplate(fis, o1, templatePatterns, "-UNRESOLVED_PATTERN-".getBytes("UTF-8")); - + fis.close(); final ByteBuffer o = new ByteBuffer(); if (zipContent) { diff --git a/source/de/anomic/kelondro/blob/BLOBHeap.java b/source/de/anomic/kelondro/blob/BLOBHeap.java index a91618b0c..3179a37a7 100755 --- a/source/de/anomic/kelondro/blob/BLOBHeap.java +++ b/source/de/anomic/kelondro/blob/BLOBHeap.java @@ -139,7 +139,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { file.writeInt(key.length + blob.length); file.write(key); file.write(blob, 0, blob.length); - index.putl(key, pos); + index.put(key, pos); } /** @@ -167,7 +167,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { entry = i.next(); key = entry.getKey().getBytes(); blob = entry.getValue(); - index.putl(key, posFile); + index.put(key, posFile); b = AbstractRandomAccess.int2array(key.length + blob.length); assert b.length == 4; assert posBuffer + 4 < ba.length : "posBuffer = " + posBuffer + ", ba.length = " + ba.length; @@ -307,7 +307,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { file.write(b); // add the entry to the index - this.index.putl(key, entry.getKey()); + this.index.put(key, entry.getKey()); // remove the entry from the free list i.remove(); @@ -338,7 +338,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { file.write(b); // add the index to the new entry - index.putl(key, lseek); + index.put(key, lseek); // define the new empty entry final int newfreereclen = lsize - reclen - 4; diff --git a/source/de/anomic/kelondro/blob/BLOBHeapModifier.java b/source/de/anomic/kelondro/blob/BLOBHeapModifier.java index f60c63143..0a084d7cb 100644 --- a/source/de/anomic/kelondro/blob/BLOBHeapModifier.java +++ b/source/de/anomic/kelondro/blob/BLOBHeapModifier.java @@ -146,7 +146,7 @@ public class BLOBHeapModifier extends HeapReader implements BLOB { assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; // check if the index contains the key - final long seek = index.getl(key); + final long seek = index.get(key); if (seek < 0) return; // access the file and read the container @@ -168,7 +168,7 @@ public class BLOBHeapModifier extends HeapReader implements BLOB { this.file.write(fill, 0, size); // remove entry from index - this.index.removel(key); + this.index.remove(key); // recursively merge gaps tryMergeNextGaps(seek, size); @@ -281,7 +281,7 @@ public class BLOBHeapModifier extends HeapReader implements BLOB { assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; // check if the index contains the key - final long pos = index.getl(key); + final long pos = index.get(key); if (pos < 0) return 0; // access the file and read the container diff --git a/source/de/anomic/kelondro/blob/HeapReader.java b/source/de/anomic/kelondro/blob/HeapReader.java index 996fcd485..3b564e7ba 100644 --- a/source/de/anomic/kelondro/blob/HeapReader.java +++ b/source/de/anomic/kelondro/blob/HeapReader.java @@ -73,7 +73,7 @@ public class HeapReader { boolean ok = true; while (i.hasNext() && c-- > 0) { b = i.next(); - pos = this.index.getl(b); + pos = this.index.get(b); file.seek(pos + 4); file.readFully(b1, 0, b1.length); if (this.ordering.compare(b, b1) != 0) { @@ -210,7 +210,7 @@ public class HeapReader { // check if the file index contains the key try { - return index.getl(key) >= 0; + return index.get(key) >= 0; } catch (final IOException e) { e.printStackTrace(); return false; @@ -231,7 +231,7 @@ public class HeapReader { assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; // check if the index contains the key - final long pos = index.getl(key); + final long pos = index.get(key); if (pos < 0) return null; // access the file and read the container @@ -270,7 +270,7 @@ public class HeapReader { assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; // check if the index contains the key - final long pos = index.getl(key); + final long pos = index.get(key); if (pos < 0) return -1; // access the file and read the size of the container diff --git a/source/de/anomic/kelondro/blob/HeapWriter.java b/source/de/anomic/kelondro/blob/HeapWriter.java index 2920639b6..820c3c8c7 100644 --- a/source/de/anomic/kelondro/blob/HeapWriter.java +++ b/source/de/anomic/kelondro/blob/HeapWriter.java @@ -87,13 +87,13 @@ public final class HeapWriter { assert blob.length > 0; assert key.length == this.keylength; assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length; - assert index.getl(key) < 0; // must not occur before + assert index.get(key) < 0; // must not occur before if ((blob == null) || (blob.length == 0)) return; int chunkl = key.length + blob.length; os.writeInt(chunkl); os.write(key); os.write(blob); - index.addl(key, seek); + index.putUnique(key, seek); this.seek += chunkl + 4; } diff --git a/source/de/anomic/kelondro/index/IntegerHandleIndex.java b/source/de/anomic/kelondro/index/IntegerHandleIndex.java index 41fd5e184..f02e1db47 100644 --- a/source/de/anomic/kelondro/index/IntegerHandleIndex.java +++ b/source/de/anomic/kelondro/index/IntegerHandleIndex.java @@ -24,8 +24,16 @@ package de.anomic.kelondro.index; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.ArrayList; +import java.util.Iterator; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; @@ -46,6 +54,51 @@ public class IntegerHandleIndex { this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder, 0); this.index = new ObjectIndexCache(rowdef, space); } + + /** + * initialize a BytesLongMap with the content of a dumped index + * @param keylength + * @param objectOrder + * @param file + * @throws IOException + */ + public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file) throws IOException { + this(keylength, objectOrder, (int) (file.length() / (keylength + 8))); + // read the index dump and fill the index + InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); + byte[] a = new byte[keylength + 8]; + int c; + while (true) { + c = is.read(a); + if (c <= 0) break; + this.index.addUnique(this.rowdef.newEntry(a)); + } + is.close(); + assert this.index.size() == file.length() / (keylength + 8); + } + + /** + * write a dump of the index to a file. All entries are written in order + * which makes it possible to read them again in a fast way + * @param file + * @return the number of written entries + * @throws IOException + */ + public int dump(File file) throws IOException { + // we must use an iterator from the combined index, because we need the entries sorted + // otherwise we could just write the byte[] from the in kelondroRowSet which would make + // everything much faster, but this is not an option here. + Iterator i = this.index.rows(true, null); + OutputStream os = new BufferedOutputStream(new FileOutputStream(file), 1024 * 1024); + int c = 0; + while (i.hasNext()) { + os.write(i.next().bytes()); + c++; + } + os.flush(); + os.close(); + return c; + } public Row row() { return index.row(); @@ -60,14 +113,14 @@ public class IntegerHandleIndex { return index.has(key); } - public synchronized int geti(final byte[] key) throws IOException { + public synchronized int get(final byte[] key) throws IOException { assert (key != null); final Row.Entry indexentry = index.get(key); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); } - public synchronized int puti(final byte[] key, final int i) throws IOException { + public synchronized int put(final byte[] key, final int i) throws IOException { assert i >= 0 : "i = " + i; assert (key != null); final Row.Entry newentry = index.row().newEntry(); @@ -78,7 +131,34 @@ public class IntegerHandleIndex { return (int) oldentry.getColLong(1); } - public synchronized void addi(final byte[] key, final int i) throws IOException { + public synchronized int add(final byte[] key, int a) throws IOException { + assert key != null; + assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue + + final Row.Entry indexentry = index.get(key); + if (indexentry == null) { + final Row.Entry newentry = this.rowdef.newEntry(); + newentry.setCol(0, key); + newentry.setCol(1, a); + index.addUnique(newentry); + return 1; + } else { + int i = (int) indexentry.getColLong(1) + a; + indexentry.setCol(1, i); + index.put(indexentry); + return i; + } + } + + public synchronized int inc(final byte[] key) throws IOException { + return add(key, 1); + } + + public synchronized int dec(final byte[] key) throws IOException { + return add(key, -1); + } + + public synchronized void putUnique(final byte[] key, final int i) throws IOException { assert i >= 0 : "i = " + i; assert (key != null); final Row.Entry newentry = this.rowdef.newEntry(); @@ -105,14 +185,14 @@ public class IntegerHandleIndex { return report; } - public synchronized int removei(final byte[] key) throws IOException { + public synchronized int remove(final byte[] key) throws IOException { assert (key != null); final Row.Entry indexentry = index.remove(key); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); } - public synchronized int removeonei() throws IOException { + public synchronized int removeone() throws IOException { final Row.Entry indexentry = index.removeOne(); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); @@ -222,7 +302,7 @@ public class IntegerHandleIndex { try { entry c; while ((c = cache.take()) != poisonEntry) { - map.addi(c.key, c.l); + map.putUnique(c.key, c.l); } } catch (InterruptedException e) { e.printStackTrace(); diff --git a/source/de/anomic/kelondro/index/LongHandleIndex.java b/source/de/anomic/kelondro/index/LongHandleIndex.java index 30201f280..d51e98736 100644 --- a/source/de/anomic/kelondro/index/LongHandleIndex.java +++ b/source/de/anomic/kelondro/index/LongHandleIndex.java @@ -116,14 +116,14 @@ public class LongHandleIndex { index.clear(); } - public synchronized long getl(final byte[] key) throws IOException { + public synchronized long get(final byte[] key) throws IOException { assert (key != null); final Row.Entry indexentry = index.get(key); if (indexentry == null) return -1; return indexentry.getColLong(1); } - public synchronized long putl(final byte[] key, final long l) throws IOException { + public synchronized long put(final byte[] key, final long l) throws IOException { assert l >= 0 : "l = " + l; assert (key != null); final Row.Entry newentry = index.row().newEntry(); @@ -134,7 +134,7 @@ public class LongHandleIndex { return oldentry.getColLong(1); } - public synchronized void addl(final byte[] key, final long l) throws IOException { + public synchronized void putUnique(final byte[] key, final long l) throws IOException { assert l >= 0 : "l = " + l; assert (key != null); final Row.Entry newentry = this.rowdef.newEntry(); @@ -143,6 +143,33 @@ public class LongHandleIndex { index.addUnique(newentry); } + public synchronized long add(final byte[] key, long a) throws IOException { + assert key != null; + assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue + + final Row.Entry indexentry = index.get(key); + if (indexentry == null) { + final Row.Entry newentry = this.rowdef.newEntry(); + newentry.setCol(0, key); + newentry.setCol(1, a); + index.addUnique(newentry); + return 1; + } else { + long i = indexentry.getColLong(1) + a; + indexentry.setCol(1, i); + index.put(indexentry); + return i; + } + } + + public synchronized long inc(final byte[] key) throws IOException { + return add(key, 1); + } + + public synchronized long dec(final byte[] key) throws IOException { + return add(key, -1); + } + public synchronized ArrayList removeDoubles() throws IOException { final ArrayList indexreport = index.removeDoubles(); final ArrayList report = new ArrayList(); @@ -159,14 +186,14 @@ public class LongHandleIndex { return report; } - public synchronized long removel(final byte[] key) throws IOException { + public synchronized long remove(final byte[] key) throws IOException { assert (key != null); final Row.Entry indexentry = index.remove(key); if (indexentry == null) return -1; return indexentry.getColLong(1); } - public synchronized long removeonel() throws IOException { + public synchronized long removeone() throws IOException { final Row.Entry indexentry = index.removeOne(); if (indexentry == null) return -1; return indexentry.getColLong(1); @@ -273,7 +300,7 @@ public class LongHandleIndex { try { entry c; while ((c = cache.take()) != poisonEntry) { - map.addl(c.key, c.l); + map.putUnique(c.key, c.l); } } catch (InterruptedException e) { e.printStackTrace(); diff --git a/source/de/anomic/kelondro/table/EcoTable.java b/source/de/anomic/kelondro/table/EcoTable.java index 016d6a35b..014b2076a 100644 --- a/source/de/anomic/kelondro/table/EcoTable.java +++ b/source/de/anomic/kelondro/table/EcoTable.java @@ -146,7 +146,7 @@ public class EcoTable implements ObjectIndex { // write the key into the index table assert key != null; if (key == null) {i++; continue;} - index.addi(key, i++); + index.putUnique(key, i++); } } else { byte[] record; @@ -159,7 +159,7 @@ public class EcoTable implements ObjectIndex { System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength); // write the key into the index table - index.addi(key, i++); + index.putUnique(key, i++); // write the tail into the table table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true)); @@ -187,7 +187,7 @@ public class EcoTable implements ObjectIndex { for (final Integer[] ds: doubles) { file.get(ds[0].intValue(), record, 0); System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength); - index.addi(key, ds[0].intValue()); + index.putUnique(key, ds[0].intValue()); } // then remove the other doubles by removing them from the table, but do a re-indexing while doing that // first aggregate all the delete positions because the elements from the top positions must be removed first @@ -264,7 +264,7 @@ public class EcoTable implements ObjectIndex { assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); final int i = (int) file.size(); - index.addi(row.getPrimaryKeyBytes(), i); + index.putUnique(row.getPrimaryKeyBytes(), i); if (table != null) { assert table.size() == i; table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true)); @@ -347,7 +347,7 @@ public class EcoTable implements ObjectIndex { if ((file == null) || (index == null)) return null; assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", fail = " + fail; assert ((table == null) || (table.size() == index.size())); - final int i = index.geti(key); + final int i = index.get(key); if (i == -1) return null; final byte[] b = new byte[rowdef.objectsize]; if (table == null) { @@ -388,7 +388,7 @@ public class EcoTable implements ObjectIndex { assert row != null; assert row.bytes() != null; if ((row == null) || (row.bytes() == null)) return null; - final int i = index.geti(row.getPrimaryKeyBytes()); + final int i = index.get(row.getPrimaryKeyBytes()); if (i == -1) { addUnique(row); return null; @@ -441,7 +441,7 @@ public class EcoTable implements ObjectIndex { file.put(i, p, 0); final byte[] k = new byte[rowdef.primaryKeyLength]; System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength); - index.puti(k, i); + index.put(k, i); } } else { if (i == index.size() - 1) { @@ -456,7 +456,7 @@ public class EcoTable implements ObjectIndex { file.cleanLast(p, 0); file.put(i, p, 0); final Row.Entry lr = rowdef.newEntry(p); - index.puti(lr.getPrimaryKeyBytes(), i); + index.put(lr.getPrimaryKeyBytes(), i); } } } @@ -465,7 +465,7 @@ public class EcoTable implements ObjectIndex { assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); assert key.length == rowdef.primaryKeyLength; - final int i = index.geti(key); + final int i = index.get(key); if (i == -1) return null; // nothing to do // prepare result @@ -476,19 +476,19 @@ public class EcoTable implements ObjectIndex { assert i < index.size(); if (table == null) { if (i == index.size() - 1) { - ix = index.removei(key); + ix = index.remove(key); assert ix == i; file.cleanLast(b, 0); } else { assert i < index.size() - 1; - ix = index.removei(key); + ix = index.remove(key); assert ix == i; file.get(i, b, 0); file.cleanLast(p, 0); file.put(i, p, 0); final byte[] k = new byte[rowdef.primaryKeyLength]; System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength); - index.puti(k, i); + index.put(k, i); } assert (file.size() == index.size() + fail); } else { @@ -499,13 +499,13 @@ public class EcoTable implements ObjectIndex { if (i == index.size() - 1) { // special handling if the entry is the last entry in the file - ix = index.removei(key); + ix = index.remove(key); assert ix == i; table.removeRow(i, false); file.cleanLast(); } else { // switch values - ix = index.removei(key); + ix = index.remove(key); assert ix == i; final Row.Entry te = table.removeOne(); @@ -514,7 +514,7 @@ public class EcoTable implements ObjectIndex { file.cleanLast(p, 0); file.put(i, p, 0); final Row.Entry lr = rowdef.newEntry(p); - index.puti(lr.getPrimaryKeyBytes(), i); + index.put(lr.getPrimaryKeyBytes(), i); } assert (file.size() == index.size() + fail); assert (table.size() == index.size()) : "table.size() = " + table.size() + ", index.size() = " + index.size(); @@ -531,7 +531,7 @@ public class EcoTable implements ObjectIndex { final byte[] le = new byte[rowdef.objectsize]; file.cleanLast(le, 0); final Row.Entry lr = rowdef.newEntry(le); - final int i = index.removei(lr.getPrimaryKeyBytes()); + final int i = index.remove(lr.getPrimaryKeyBytes()); assert i >= 0; if (table != null) table.removeOne(); assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); @@ -645,7 +645,7 @@ public class EcoTable implements ObjectIndex { assert k != null; if (k == null) return null; try { - this.c = index.geti(k); + this.c = index.get(k); } catch (final IOException e) { e.printStackTrace(); return null; diff --git a/source/de/anomic/kelondro/table/FlexTable.java b/source/de/anomic/kelondro/table/FlexTable.java index 9cfa2cef3..3052adc05 100644 --- a/source/de/anomic/kelondro/table/FlexTable.java +++ b/source/de/anomic/kelondro/table/FlexTable.java @@ -164,7 +164,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { } assert (key != null) : "DEBUG: empty key in initializeRamIndex"; // should not happen; if it does, it is an error of the condentNodes iterator //System.out.println("ENTRY: " + serverLog.arrayList(indexentry.bytes(), 0, indexentry.objectsize())); - try { ri.addi(key, i); } catch (final IOException e) {} // no IOException can happen here + try { ri.putUnique(key, i); } catch (final IOException e) {} // no IOException can happen here if ((i % 10000) == 0) { System.out.print('.'); System.out.flush(); @@ -177,7 +177,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { public synchronized Row.Entry get(final byte[] key) throws IOException { if (index == null) return null; // case may happen during shutdown - final int pos = index.geti(key); + final int pos = index.get(key); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); if (pos < 0) return null; // pos may be greater than this.size(), because this table may have deleted entries @@ -211,7 +211,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { while (i.hasNext()) { row = i.next(); key = row.getColBytes(0); - pos = index.geti(key); + pos = index.get(key); if (pos < 0) { new_rows_sequential.add(row); } else { @@ -237,10 +237,10 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { assert row.objectsize() <= this.rowdef.objectsize; final byte[] key = row.getColBytes(0); if (index == null) return null; // case may appear during shutdown - int pos = index.geti(key); + int pos = index.get(key); if (pos < 0) { pos = super.add(row); - index.puti(key, pos); + index.put(key, pos); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); return null; } @@ -250,11 +250,11 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { if (oldentry == null) { Log.logSevere("kelondroFlexTable", "put(): index failure; the index pointed to a cell which is empty. content.size() = " + this.size() + ", index.size() = " + index.size()); // patch bug ***** FIND CAUSE! (see also: remove) - final int oldindex = index.removei(key); + final int oldindex = index.remove(key); assert oldindex >= 0; - assert index.geti(key) == -1; + assert index.get(key) == -1; // here is this.size() > index.size() because of remove operation above - index.puti(key, super.add(row)); + index.put(key, super.add(row)); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); return null; } @@ -268,7 +268,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { public synchronized void addUnique(final Row.Entry row) throws IOException { assert row.objectsize() == this.rowdef.objectsize; assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - index.addi(row.getColBytes(0), super.add(row)); + index.putUnique(row.getColBytes(0), super.add(row)); } public synchronized void addUniqueMultiple(final List rows) throws IOException { @@ -281,7 +281,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { Map.Entry entry; while (i.hasNext()) { entry = i.next(); - index.puti(entry.getValue(), entry.getKey().intValue()); + index.put(entry.getValue(), entry.getKey().intValue()); } assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); } @@ -310,8 +310,8 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { public synchronized Row.Entry remove(final byte[] key) throws IOException { // the underlying data structure is a file, where the order cannot be maintained. Gaps are filled with new values. - final int i = index.removei(key); - assert (index.geti(key) < 0); // must be deleted + final int i = index.remove(key); + assert (index.get(key) < 0); // must be deleted if (i < 0) { assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); return null; @@ -332,7 +332,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex { } public synchronized Row.Entry removeOne() throws IOException { - final int i = index.removeonei(); + final int i = index.removeone(); if (i < 0) return null; Row.Entry r; r = super.get(i); diff --git a/source/de/anomic/kelondro/text/IndexCollection.java b/source/de/anomic/kelondro/text/IndexCollection.java index a16659919..e3d585905 100644 --- a/source/de/anomic/kelondro/text/IndexCollection.java +++ b/source/de/anomic/kelondro/text/IndexCollection.java @@ -39,6 +39,7 @@ import java.util.Random; import java.util.Set; import java.util.TimeZone; +import de.anomic.kelondro.index.IntegerHandleIndex; import de.anomic.kelondro.index.ObjectIndex; import de.anomic.kelondro.index.Row; import de.anomic.kelondro.index.RowCollection; @@ -68,15 +69,15 @@ public class IndexCollection implements Index { private static final int EcoFSBufferSize = 1000; private static final int errorLimit = 500; // if the index exceeds this number of errors, it is re-built next time the application starts - private ObjectIndex index; - private final int keylength; - private final File path; - private final String filenameStub; - private final File commonsPath; + private ObjectIndex index; + private final int keylength; + private final File path; + private final String filenameStub; + private final File commonsPath; + private final Row payloadrow; // definition of the payload (chunks inside the collections) + private final int maxPartitions; // this is the maxmimum number of array files + private int indexErrors; // counter for exceptions when index returned wrong value private Map arrays; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects - private final Row payloadrow; // definition of the payload (chunks inside the collections) - private final int maxPartitions; // this is the maxmimum number of array files - private int indexErrors; // counter for exceptions when index returned wrong value private static final int idx_col_key = 0; // the index private static final int idx_col_chunksize = 1; // chunksize (number of bytes in a single chunk, needed for migration option) @@ -93,16 +94,16 @@ public class IndexCollection implements Index { final int keyLength, final ByteOrder indexOrder, final int maxpartitions, - final Row rowdef, + final Row payloadrow, boolean useCommons) throws IOException { // the buffersize is number of bytes that are only used if the kelondroFlexTable is backed up with a kelondroTree indexErrors = 0; this.path = path; this.filenameStub = filenameStub; this.keylength = keyLength; - this.payloadrow = rowdef; + this.payloadrow = payloadrow; this.maxPartitions = maxpartitions; - File cop = new File(path, filenameStub + "." + fillZ(Integer.toHexString(rowdef.objectsize).toUpperCase(), 4) + ".commons"); + File cop = new File(path, filenameStub + "." + fillZ(Integer.toHexString(payloadrow.objectsize).toUpperCase(), 4) + ".commons"); this.commonsPath = (useCommons) ? cop : null; if (this.commonsPath == null) { FileUtils.deleteDirectory(cop); @@ -118,7 +119,7 @@ public class IndexCollection implements Index { // open index and array files this.arrays = new HashMap(); // all entries will be dynamically created with getArray() - index = openIndexFile(path, filenameStub, indexOrder, loadfactor, rowdef, 0); + index = openIndexFile(path, this.keylength, filenameStub, indexOrder, loadfactor, payloadrow, 0); openAllArrayFiles(false, indexOrder); } else { // calculate initialSpace @@ -132,7 +133,7 @@ public class IndexCollection implements Index { final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16); final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16); try { - array = openArrayFile(partitionNumber, serialNumber, indexOrder, true); + array = openArrayFile(this.path, this.filenameStub, this.keylength, partitionNumber, serialNumber, indexOrder, this.payloadrow.objectsize, true); initialSpace += array.size(); array.close(); } catch (final IOException e) { @@ -317,7 +318,7 @@ public class IndexCollection implements Index { final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16); final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16); try { - array = openArrayFile(partitionNumber, serialNumber, indexOrder, true); + array = openArrayFile(this.path, this.filenameStub, this.keylength, partitionNumber, serialNumber, indexOrder, this.payloadrow.objectsize, true); } catch (final IOException e) { e.printStackTrace(); continue; @@ -390,7 +391,67 @@ public class IndexCollection implements Index { if (doublecount > 0) Log.logWarning("STARTUP", "found " + doublecount + " RWI entries with references to several collections. All have been fixed (zombies still exists)."); } - private ObjectIndex openIndexFile(final File path, final String filenameStub, final ByteOrder indexOrder, + /** + * enumerate all index files and return a set of reference hashes + * @param path + * @param filenameStub + * @param keylength + * @param indexOrder + * @param payloadrow + * @return + * @throws IOException + */ + public static IntegerHandleIndex referenceHashes( + final File path, + final String filenameStub, + final int keylength, + final ByteOrder indexOrder, + final Row payloadrow) throws IOException { + + final String[] list = path.list(); + FixedWidthArray array; + IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 100000); + for (int i = 0; i < list.length; i++) if (list[i].endsWith(".kca")) { + // open array + final int pos = list[i].indexOf('.'); + if (pos < 0) continue; + final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16); + final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16); + try { + array = openArrayFile(path, filenameStub, keylength, partitionNumber, serialNumber, indexOrder, payloadrow.objectsize, true); + } catch (final IOException e) { + e.printStackTrace(); + continue; + } + + // loop over all elements in array and collect reference hashes + Row.EntryIndex arrayrow; + final Iterator ei = array.contentRows(-1); + final long start = System.currentTimeMillis(); + long lastlog = start; + int count = 0; + while (ei.hasNext()) { + arrayrow = ei.next(); + if (arrayrow == null) continue; + final RowSet collection = new RowSet(payloadrow, arrayrow); + final int chunkcountInArray = collection.size(); + for (int j = 0; j < chunkcountInArray; j++) { + references.inc(collection.get(j, false).getColBytes(0)); + } + count++; + // write a log + if (System.currentTimeMillis() - lastlog > 30000) { + Log.logFine("COLLECTION INDEX STARTUP", "scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array"); + lastlog = System.currentTimeMillis(); + } + } + + } + return references; + } + + private static ObjectIndex openIndexFile( + final File path, int keylength, final String filenameStub, final ByteOrder indexOrder, final int loadfactor, final Row rowdef, final int initialSpace) throws IOException { // open/create index table final File f = new File(path, filenameStub + ".index"); @@ -413,12 +474,14 @@ public class IndexCollection implements Index { return theindex; } - private FixedWidthArray openArrayFile(final int partitionNumber, final int serialNumber, final ByteOrder indexOrder, final boolean create) throws IOException { - final File f = arrayFile(path, filenameStub, loadfactor, payloadrow.objectsize, partitionNumber, serialNumber); + private static FixedWidthArray openArrayFile( + File path, String filenameStub, int keylength, + final int partitionNumber, final int serialNumber, final ByteOrder indexOrder, int objectsize, final boolean create) throws IOException { + final File f = arrayFile(path, filenameStub, loadfactor, objectsize, partitionNumber, serialNumber); final int load = arrayCapacity(partitionNumber); final Row rowdef = new Row( "byte[] key-" + keylength + "," + - "byte[] collection-" + (RowCollection.exportOverheadSize + load * this.payloadrow.objectsize), + "byte[] collection-" + (RowCollection.exportOverheadSize + load * objectsize), indexOrder, 0 ); @@ -433,7 +496,7 @@ public class IndexCollection implements Index { FixedWidthArray array = arrays.get(accessKey); if (array != null) return array; try { - array = openArrayFile(partitionNumber, serialNumber, indexOrder, true); + array = openArrayFile(this.path, this.filenameStub, this.keylength, partitionNumber, serialNumber, indexOrder, this.payloadrow.objectsize, true); } catch (final IOException e) { e.printStackTrace(); return null; @@ -442,14 +505,14 @@ public class IndexCollection implements Index { return array; } - private int arrayCapacity(final int arrayCounter) { + private static int arrayCapacity(final int arrayCounter) { if (arrayCounter < 0) return 0; int load = loadfactor; for (int i = 0; i < arrayCounter; i++) load = load * loadfactor; return load; } - private int arrayIndex(final int requestedCapacity) throws kelondroOutOfLimitsException{ + private static int arrayIndex(final int requestedCapacity) throws kelondroOutOfLimitsException{ // the requestedCapacity is the number of wanted chunks int load = 1, i = 0; while (true) { diff --git a/source/de/anomic/kelondro/util/FileUtils.java b/source/de/anomic/kelondro/util/FileUtils.java index aca6c093e..4cd9da7cb 100644 --- a/source/de/anomic/kelondro/util/FileUtils.java +++ b/source/de/anomic/kelondro/util/FileUtils.java @@ -280,6 +280,7 @@ public final class FileUtils { while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c; } finally { if (fis != null) try { fis.close(); } catch (final Exception e) {} + fis = null; } return buffer; } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 0a1cb14c2..d7599ca5b 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1915,7 +1915,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch this.webIndex.seedDB.scheme.verticalPartitions() * 4) { log.logInfo("dhtTransferJob: no selection, too many entries in transmission cloud: " + this.dhtDispatcher.cloudSize()); - } else if (MemoryControl.available() < 1024*1024*20) { + } else if (MemoryControl.available() < 1024*1024*25) { log.logInfo("dhtTransferJob: no selection, too less memory available : " + (MemoryControl.available() / 1024 / 1024) + " MB"); } else { String startHash = PeerSelection.selectTransferStart();