- refactoring of IntegerHandleIndex and LongHandleIndex (better method names)

- fix for problem in httpdFileHandler: mising close of open Files if tempate cache was disabled
- more memory for DHT selection required
- stub for URL reference hash statistics in index collections

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5682 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 9b6fac4a82
commit b80db04667

@ -380,7 +380,13 @@ public class URLAnalysis {
System.out.println("finished");
}
/*
public static void used(String collectionPath, String statisticPath) {
File collections = new File(collectionPath);
File out = new File(statisticPath);
IntegerHandleIndex idx = IndexCollection.referenceHashes(collctions, filenameStub, keylength, indexOrder, payloadrow)
}
*/
public static void main(String[] args) {
// example: java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -stat DATA/EXPORT/urls1.txt.gz
if (args[0].equals("-stat") && args.length >= 2) {
@ -388,7 +394,9 @@ public class URLAnalysis {
} else if (args[0].equals("-host") && args.length >= 2) {
for (int i = 1; i < args.length; i++) genhost(args[i]);
} else if (args[0].equals("-sort") && args.length >= 2) {
for (int i = 1; i < args.length; i++) sortsplit(args[i]);
for (int i = 1; i < args.length; i++) sortsplit(args[i]);
//} else if (args[0].equals("-incollection") && args.length >= 2) {
// used(args[1], args[2]);
} else {
System.out.println("usage:");
System.out.println("-stat <file> generate a statistics about common words in file, store to <file>.stat");

@ -86,6 +86,7 @@ import de.anomic.kelondro.util.ByteBuffer;
import de.anomic.kelondro.util.DateFormatter;
import de.anomic.kelondro.util.Log;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSwitchboardConstants;
@ -812,45 +813,43 @@ public final class httpdFileHandler {
// read the file/template
TemplateCacheEntry templateCacheEntry = null;
if (useTemplateCache) {
final long fileSize = targetFile.length();
if (fileSize <= 512 * 1024) {
// read from cache
SoftReference<TemplateCacheEntry> ref = templateCache.get(targetFile);
if (ref != null) {
templateCacheEntry = ref.get();
if (templateCacheEntry == null) templateCache.remove(targetFile);
}
long fileSize = targetFile.length();
if (useTemplateCache && fileSize <= 512 * 1024) {
// read from cache
SoftReference<TemplateCacheEntry> ref = templateCache.get(targetFile);
if (ref != null) {
templateCacheEntry = ref.get();
if (templateCacheEntry == null) templateCache.remove(targetFile);
}
Date targetFileDate = new Date(targetFile.lastModified());
if (templateCacheEntry == null || targetFileDate.after(templateCacheEntry.lastModified)) {
// loading the content of the template file into
// a byte array
templateCacheEntry = new TemplateCacheEntry();
templateCacheEntry.lastModified = targetFileDate;
templateCacheEntry.content = FileUtils.read(targetFile);
Date targetFileDate = new Date(targetFile.lastModified());
if (templateCacheEntry == null || targetFileDate.after(templateCacheEntry.lastModified)) {
// loading the content of the template file into
// a byte array
templateCacheEntry = new TemplateCacheEntry();
templateCacheEntry.lastModified = targetFileDate;
templateCacheEntry.content = FileUtils.read(targetFile);
// storing the content into the cache
ref = new SoftReference<TemplateCacheEntry>(templateCacheEntry);
templateCache.put(targetFile, ref);
if (theLogger.isFinest()) theLogger.logFinest("Cache MISS for file " + targetFile);
} else {
if (theLogger.isFinest()) theLogger.logFinest("Cache HIT for file " + targetFile);
}
// creating an inputstream needed by the template
// rewrite function
fis = new ByteArrayInputStream(templateCacheEntry.content);
templateCacheEntry = null;
// storing the content into the cache
ref = new SoftReference<TemplateCacheEntry>(templateCacheEntry);
templateCache.put(targetFile, ref);
if (theLogger.isFinest()) theLogger.logFinest("Cache MISS for file " + targetFile);
} else {
// read from file directly
fis = new BufferedInputStream(new FileInputStream(targetFile));
if (theLogger.isFinest()) theLogger.logFinest("Cache HIT for file " + targetFile);
}
// creating an inputstream needed by the template
// rewrite function
fis = new ByteArrayInputStream(templateCacheEntry.content);
templateCacheEntry = null;
} else if (fileSize <= Math.min(4 * 1024 * 1204, MemoryControl.available() / 100)) {
// read file completely into ram, avoid that too many files are open at the same time
fis = new ByteArrayInputStream(FileUtils.read(targetFile));
} else {
fis = new BufferedInputStream(new FileInputStream(targetFile));
}
if(mimeType.startsWith("text")) {
if (mimeType.startsWith("text")) {
// every text-file distributed by yacy is UTF-8
if(!path.startsWith("/repository")) {
mimeType = mimeType + "; charset=UTF-8";
@ -880,6 +879,7 @@ public final class httpdFileHandler {
final ByteBuffer o = new ByteBuffer();
// apply templates
httpTemplate.writeTemplate(fis, o, templatePatterns, "-UNRESOLVED_PATTERN-".getBytes("UTF-8"));
fis.close();
httpd.sendRespondHeader(conProp, out, httpVersion, 200, null, mimeType, -1, targetDate, null, (templatePatterns == null) ? new httpResponseHeader() : templatePatterns.getOutgoingHeader(), null, "chunked", nocache);
// send the content in chunked parts, see RFC 2616 section 3.6.1
final httpChunkedOutputStream chos = new httpChunkedOutputStream(out);
@ -892,7 +892,7 @@ public final class httpdFileHandler {
// apply templates
final ByteBuffer o1 = new ByteBuffer();
httpTemplate.writeTemplate(fis, o1, templatePatterns, "-UNRESOLVED_PATTERN-".getBytes("UTF-8"));
fis.close();
final ByteBuffer o = new ByteBuffer();
if (zipContent) {

@ -139,7 +139,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB {
file.writeInt(key.length + blob.length);
file.write(key);
file.write(blob, 0, blob.length);
index.putl(key, pos);
index.put(key, pos);
}
/**
@ -167,7 +167,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB {
entry = i.next();
key = entry.getKey().getBytes();
blob = entry.getValue();
index.putl(key, posFile);
index.put(key, posFile);
b = AbstractRandomAccess.int2array(key.length + blob.length);
assert b.length == 4;
assert posBuffer + 4 < ba.length : "posBuffer = " + posBuffer + ", ba.length = " + ba.length;
@ -307,7 +307,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB {
file.write(b);
// add the entry to the index
this.index.putl(key, entry.getKey());
this.index.put(key, entry.getKey());
// remove the entry from the free list
i.remove();
@ -338,7 +338,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB {
file.write(b);
// add the index to the new entry
index.putl(key, lseek);
index.put(key, lseek);
// define the new empty entry
final int newfreereclen = lsize - reclen - 4;

@ -146,7 +146,7 @@ public class BLOBHeapModifier extends HeapReader implements BLOB {
assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length;
// check if the index contains the key
final long seek = index.getl(key);
final long seek = index.get(key);
if (seek < 0) return;
// access the file and read the container
@ -168,7 +168,7 @@ public class BLOBHeapModifier extends HeapReader implements BLOB {
this.file.write(fill, 0, size);
// remove entry from index
this.index.removel(key);
this.index.remove(key);
// recursively merge gaps
tryMergeNextGaps(seek, size);
@ -281,7 +281,7 @@ public class BLOBHeapModifier extends HeapReader implements BLOB {
assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length;
// check if the index contains the key
final long pos = index.getl(key);
final long pos = index.get(key);
if (pos < 0) return 0;
// access the file and read the container

@ -73,7 +73,7 @@ public class HeapReader {
boolean ok = true;
while (i.hasNext() && c-- > 0) {
b = i.next();
pos = this.index.getl(b);
pos = this.index.get(b);
file.seek(pos + 4);
file.readFully(b1, 0, b1.length);
if (this.ordering.compare(b, b1) != 0) {
@ -210,7 +210,7 @@ public class HeapReader {
// check if the file index contains the key
try {
return index.getl(key) >= 0;
return index.get(key) >= 0;
} catch (final IOException e) {
e.printStackTrace();
return false;
@ -231,7 +231,7 @@ public class HeapReader {
assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length;
// check if the index contains the key
final long pos = index.getl(key);
final long pos = index.get(key);
if (pos < 0) return null;
// access the file and read the container
@ -270,7 +270,7 @@ public class HeapReader {
assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length;
// check if the index contains the key
final long pos = index.getl(key);
final long pos = index.get(key);
if (pos < 0) return -1;
// access the file and read the size of the container

@ -87,13 +87,13 @@ public final class HeapWriter {
assert blob.length > 0;
assert key.length == this.keylength;
assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length;
assert index.getl(key) < 0; // must not occur before
assert index.get(key) < 0; // must not occur before
if ((blob == null) || (blob.length == 0)) return;
int chunkl = key.length + blob.length;
os.writeInt(chunkl);
os.write(key);
os.write(blob);
index.addl(key, seek);
index.putUnique(key, seek);
this.seek += chunkl + 4;
}

@ -24,8 +24,16 @@
package de.anomic.kelondro.index;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
@ -46,6 +54,51 @@ public class IntegerHandleIndex {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder, 0);
this.index = new ObjectIndexCache(rowdef, space);
}
/**
* initialize a BytesLongMap with the content of a dumped index
* @param keylength
* @param objectOrder
* @param file
* @throws IOException
*/
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)));
// read the index dump and fill the index
InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
byte[] a = new byte[keylength + 8];
int c;
while (true) {
c = is.read(a);
if (c <= 0) break;
this.index.addUnique(this.rowdef.newEntry(a));
}
is.close();
assert this.index.size() == file.length() / (keylength + 8);
}
/**
* write a dump of the index to a file. All entries are written in order
* which makes it possible to read them again in a fast way
* @param file
* @return the number of written entries
* @throws IOException
*/
public int dump(File file) throws IOException {
// we must use an iterator from the combined index, because we need the entries sorted
// otherwise we could just write the byte[] from the in kelondroRowSet which would make
// everything much faster, but this is not an option here.
Iterator<Row.Entry> i = this.index.rows(true, null);
OutputStream os = new BufferedOutputStream(new FileOutputStream(file), 1024 * 1024);
int c = 0;
while (i.hasNext()) {
os.write(i.next().bytes());
c++;
}
os.flush();
os.close();
return c;
}
public Row row() {
return index.row();
@ -60,14 +113,14 @@ public class IntegerHandleIndex {
return index.has(key);
}
public synchronized int geti(final byte[] key) throws IOException {
public synchronized int get(final byte[] key) throws IOException {
assert (key != null);
final Row.Entry indexentry = index.get(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
}
public synchronized int puti(final byte[] key, final int i) throws IOException {
public synchronized int put(final byte[] key, final int i) throws IOException {
assert i >= 0 : "i = " + i;
assert (key != null);
final Row.Entry newentry = index.row().newEntry();
@ -78,7 +131,34 @@ public class IntegerHandleIndex {
return (int) oldentry.getColLong(1);
}
public synchronized void addi(final byte[] key, final int i) throws IOException {
public synchronized int add(final byte[] key, int a) throws IOException {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
final Row.Entry indexentry = index.get(key);
if (indexentry == null) {
final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, a);
index.addUnique(newentry);
return 1;
} else {
int i = (int) indexentry.getColLong(1) + a;
indexentry.setCol(1, i);
index.put(indexentry);
return i;
}
}
public synchronized int inc(final byte[] key) throws IOException {
return add(key, 1);
}
public synchronized int dec(final byte[] key) throws IOException {
return add(key, -1);
}
public synchronized void putUnique(final byte[] key, final int i) throws IOException {
assert i >= 0 : "i = " + i;
assert (key != null);
final Row.Entry newentry = this.rowdef.newEntry();
@ -105,14 +185,14 @@ public class IntegerHandleIndex {
return report;
}
public synchronized int removei(final byte[] key) throws IOException {
public synchronized int remove(final byte[] key) throws IOException {
assert (key != null);
final Row.Entry indexentry = index.remove(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
}
public synchronized int removeonei() throws IOException {
public synchronized int removeone() throws IOException {
final Row.Entry indexentry = index.removeOne();
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
@ -222,7 +302,7 @@ public class IntegerHandleIndex {
try {
entry c;
while ((c = cache.take()) != poisonEntry) {
map.addi(c.key, c.l);
map.putUnique(c.key, c.l);
}
} catch (InterruptedException e) {
e.printStackTrace();

@ -116,14 +116,14 @@ public class LongHandleIndex {
index.clear();
}
public synchronized long getl(final byte[] key) throws IOException {
public synchronized long get(final byte[] key) throws IOException {
assert (key != null);
final Row.Entry indexentry = index.get(key);
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized long putl(final byte[] key, final long l) throws IOException {
public synchronized long put(final byte[] key, final long l) throws IOException {
assert l >= 0 : "l = " + l;
assert (key != null);
final Row.Entry newentry = index.row().newEntry();
@ -134,7 +134,7 @@ public class LongHandleIndex {
return oldentry.getColLong(1);
}
public synchronized void addl(final byte[] key, final long l) throws IOException {
public synchronized void putUnique(final byte[] key, final long l) throws IOException {
assert l >= 0 : "l = " + l;
assert (key != null);
final Row.Entry newentry = this.rowdef.newEntry();
@ -143,6 +143,33 @@ public class LongHandleIndex {
index.addUnique(newentry);
}
public synchronized long add(final byte[] key, long a) throws IOException {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
final Row.Entry indexentry = index.get(key);
if (indexentry == null) {
final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, a);
index.addUnique(newentry);
return 1;
} else {
long i = indexentry.getColLong(1) + a;
indexentry.setCol(1, i);
index.put(indexentry);
return i;
}
}
public synchronized long inc(final byte[] key) throws IOException {
return add(key, 1);
}
public synchronized long dec(final byte[] key) throws IOException {
return add(key, -1);
}
public synchronized ArrayList<Long[]> removeDoubles() throws IOException {
final ArrayList<RowCollection> indexreport = index.removeDoubles();
final ArrayList<Long[]> report = new ArrayList<Long[]>();
@ -159,14 +186,14 @@ public class LongHandleIndex {
return report;
}
public synchronized long removel(final byte[] key) throws IOException {
public synchronized long remove(final byte[] key) throws IOException {
assert (key != null);
final Row.Entry indexentry = index.remove(key);
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized long removeonel() throws IOException {
public synchronized long removeone() throws IOException {
final Row.Entry indexentry = index.removeOne();
if (indexentry == null) return -1;
return indexentry.getColLong(1);
@ -273,7 +300,7 @@ public class LongHandleIndex {
try {
entry c;
while ((c = cache.take()) != poisonEntry) {
map.addl(c.key, c.l);
map.putUnique(c.key, c.l);
}
} catch (InterruptedException e) {
e.printStackTrace();

@ -146,7 +146,7 @@ public class EcoTable implements ObjectIndex {
// write the key into the index table
assert key != null;
if (key == null) {i++; continue;}
index.addi(key, i++);
index.putUnique(key, i++);
}
} else {
byte[] record;
@ -159,7 +159,7 @@ public class EcoTable implements ObjectIndex {
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
// write the key into the index table
index.addi(key, i++);
index.putUnique(key, i++);
// write the tail into the table
table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true));
@ -187,7 +187,7 @@ public class EcoTable implements ObjectIndex {
for (final Integer[] ds: doubles) {
file.get(ds[0].intValue(), record, 0);
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
index.addi(key, ds[0].intValue());
index.putUnique(key, ds[0].intValue());
}
// then remove the other doubles by removing them from the table, but do a re-indexing while doing that
// first aggregate all the delete positions because the elements from the top positions must be removed first
@ -264,7 +264,7 @@ public class EcoTable implements ObjectIndex {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
final int i = (int) file.size();
index.addi(row.getPrimaryKeyBytes(), i);
index.putUnique(row.getPrimaryKeyBytes(), i);
if (table != null) {
assert table.size() == i;
table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
@ -347,7 +347,7 @@ public class EcoTable implements ObjectIndex {
if ((file == null) || (index == null)) return null;
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", fail = " + fail;
assert ((table == null) || (table.size() == index.size()));
final int i = index.geti(key);
final int i = index.get(key);
if (i == -1) return null;
final byte[] b = new byte[rowdef.objectsize];
if (table == null) {
@ -388,7 +388,7 @@ public class EcoTable implements ObjectIndex {
assert row != null;
assert row.bytes() != null;
if ((row == null) || (row.bytes() == null)) return null;
final int i = index.geti(row.getPrimaryKeyBytes());
final int i = index.get(row.getPrimaryKeyBytes());
if (i == -1) {
addUnique(row);
return null;
@ -441,7 +441,7 @@ public class EcoTable implements ObjectIndex {
file.put(i, p, 0);
final byte[] k = new byte[rowdef.primaryKeyLength];
System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
index.puti(k, i);
index.put(k, i);
}
} else {
if (i == index.size() - 1) {
@ -456,7 +456,7 @@ public class EcoTable implements ObjectIndex {
file.cleanLast(p, 0);
file.put(i, p, 0);
final Row.Entry lr = rowdef.newEntry(p);
index.puti(lr.getPrimaryKeyBytes(), i);
index.put(lr.getPrimaryKeyBytes(), i);
}
}
}
@ -465,7 +465,7 @@ public class EcoTable implements ObjectIndex {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
assert key.length == rowdef.primaryKeyLength;
final int i = index.geti(key);
final int i = index.get(key);
if (i == -1) return null; // nothing to do
// prepare result
@ -476,19 +476,19 @@ public class EcoTable implements ObjectIndex {
assert i < index.size();
if (table == null) {
if (i == index.size() - 1) {
ix = index.removei(key);
ix = index.remove(key);
assert ix == i;
file.cleanLast(b, 0);
} else {
assert i < index.size() - 1;
ix = index.removei(key);
ix = index.remove(key);
assert ix == i;
file.get(i, b, 0);
file.cleanLast(p, 0);
file.put(i, p, 0);
final byte[] k = new byte[rowdef.primaryKeyLength];
System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
index.puti(k, i);
index.put(k, i);
}
assert (file.size() == index.size() + fail);
} else {
@ -499,13 +499,13 @@ public class EcoTable implements ObjectIndex {
if (i == index.size() - 1) {
// special handling if the entry is the last entry in the file
ix = index.removei(key);
ix = index.remove(key);
assert ix == i;
table.removeRow(i, false);
file.cleanLast();
} else {
// switch values
ix = index.removei(key);
ix = index.remove(key);
assert ix == i;
final Row.Entry te = table.removeOne();
@ -514,7 +514,7 @@ public class EcoTable implements ObjectIndex {
file.cleanLast(p, 0);
file.put(i, p, 0);
final Row.Entry lr = rowdef.newEntry(p);
index.puti(lr.getPrimaryKeyBytes(), i);
index.put(lr.getPrimaryKeyBytes(), i);
}
assert (file.size() == index.size() + fail);
assert (table.size() == index.size()) : "table.size() = " + table.size() + ", index.size() = " + index.size();
@ -531,7 +531,7 @@ public class EcoTable implements ObjectIndex {
final byte[] le = new byte[rowdef.objectsize];
file.cleanLast(le, 0);
final Row.Entry lr = rowdef.newEntry(le);
final int i = index.removei(lr.getPrimaryKeyBytes());
final int i = index.remove(lr.getPrimaryKeyBytes());
assert i >= 0;
if (table != null) table.removeOne();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
@ -645,7 +645,7 @@ public class EcoTable implements ObjectIndex {
assert k != null;
if (k == null) return null;
try {
this.c = index.geti(k);
this.c = index.get(k);
} catch (final IOException e) {
e.printStackTrace();
return null;

@ -164,7 +164,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
}
assert (key != null) : "DEBUG: empty key in initializeRamIndex"; // should not happen; if it does, it is an error of the condentNodes iterator
//System.out.println("ENTRY: " + serverLog.arrayList(indexentry.bytes(), 0, indexentry.objectsize()));
try { ri.addi(key, i); } catch (final IOException e) {} // no IOException can happen here
try { ri.putUnique(key, i); } catch (final IOException e) {} // no IOException can happen here
if ((i % 10000) == 0) {
System.out.print('.');
System.out.flush();
@ -177,7 +177,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
public synchronized Row.Entry get(final byte[] key) throws IOException {
if (index == null) return null; // case may happen during shutdown
final int pos = index.geti(key);
final int pos = index.get(key);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
if (pos < 0) return null;
// pos may be greater than this.size(), because this table may have deleted entries
@ -211,7 +211,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
while (i.hasNext()) {
row = i.next();
key = row.getColBytes(0);
pos = index.geti(key);
pos = index.get(key);
if (pos < 0) {
new_rows_sequential.add(row);
} else {
@ -237,10 +237,10 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
assert row.objectsize() <= this.rowdef.objectsize;
final byte[] key = row.getColBytes(0);
if (index == null) return null; // case may appear during shutdown
int pos = index.geti(key);
int pos = index.get(key);
if (pos < 0) {
pos = super.add(row);
index.puti(key, pos);
index.put(key, pos);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return null;
}
@ -250,11 +250,11 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
if (oldentry == null) {
Log.logSevere("kelondroFlexTable", "put(): index failure; the index pointed to a cell which is empty. content.size() = " + this.size() + ", index.size() = " + index.size());
// patch bug ***** FIND CAUSE! (see also: remove)
final int oldindex = index.removei(key);
final int oldindex = index.remove(key);
assert oldindex >= 0;
assert index.geti(key) == -1;
assert index.get(key) == -1;
// here is this.size() > index.size() because of remove operation above
index.puti(key, super.add(row));
index.put(key, super.add(row));
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return null;
}
@ -268,7 +268,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
public synchronized void addUnique(final Row.Entry row) throws IOException {
assert row.objectsize() == this.rowdef.objectsize;
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
index.addi(row.getColBytes(0), super.add(row));
index.putUnique(row.getColBytes(0), super.add(row));
}
public synchronized void addUniqueMultiple(final List<Row.Entry> rows) throws IOException {
@ -281,7 +281,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
Map.Entry<Integer, byte[]> entry;
while (i.hasNext()) {
entry = i.next();
index.puti(entry.getValue(), entry.getKey().intValue());
index.put(entry.getValue(), entry.getKey().intValue());
}
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
}
@ -310,8 +310,8 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
public synchronized Row.Entry remove(final byte[] key) throws IOException {
// the underlying data structure is a file, where the order cannot be maintained. Gaps are filled with new values.
final int i = index.removei(key);
assert (index.geti(key) < 0); // must be deleted
final int i = index.remove(key);
assert (index.get(key) < 0); // must be deleted
if (i < 0) {
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return null;
@ -332,7 +332,7 @@ public class FlexTable extends FlexWidthArray implements ObjectIndex {
}
public synchronized Row.Entry removeOne() throws IOException {
final int i = index.removeonei();
final int i = index.removeone();
if (i < 0) return null;
Row.Entry r;
r = super.get(i);

@ -39,6 +39,7 @@ import java.util.Random;
import java.util.Set;
import java.util.TimeZone;
import de.anomic.kelondro.index.IntegerHandleIndex;
import de.anomic.kelondro.index.ObjectIndex;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.RowCollection;
@ -68,15 +69,15 @@ public class IndexCollection implements Index {
private static final int EcoFSBufferSize = 1000;
private static final int errorLimit = 500; // if the index exceeds this number of errors, it is re-built next time the application starts
private ObjectIndex index;
private final int keylength;
private final File path;
private final String filenameStub;
private final File commonsPath;
private ObjectIndex index;
private final int keylength;
private final File path;
private final String filenameStub;
private final File commonsPath;
private final Row payloadrow; // definition of the payload (chunks inside the collections)
private final int maxPartitions; // this is the maxmimum number of array files
private int indexErrors; // counter for exceptions when index returned wrong value
private Map<String, FixedWidthArray> arrays; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects
private final Row payloadrow; // definition of the payload (chunks inside the collections)
private final int maxPartitions; // this is the maxmimum number of array files
private int indexErrors; // counter for exceptions when index returned wrong value
private static final int idx_col_key = 0; // the index
private static final int idx_col_chunksize = 1; // chunksize (number of bytes in a single chunk, needed for migration option)
@ -93,16 +94,16 @@ public class IndexCollection implements Index {
final int keyLength,
final ByteOrder indexOrder,
final int maxpartitions,
final Row rowdef,
final Row payloadrow,
boolean useCommons) throws IOException {
// the buffersize is number of bytes that are only used if the kelondroFlexTable is backed up with a kelondroTree
indexErrors = 0;
this.path = path;
this.filenameStub = filenameStub;
this.keylength = keyLength;
this.payloadrow = rowdef;
this.payloadrow = payloadrow;
this.maxPartitions = maxpartitions;
File cop = new File(path, filenameStub + "." + fillZ(Integer.toHexString(rowdef.objectsize).toUpperCase(), 4) + ".commons");
File cop = new File(path, filenameStub + "." + fillZ(Integer.toHexString(payloadrow.objectsize).toUpperCase(), 4) + ".commons");
this.commonsPath = (useCommons) ? cop : null;
if (this.commonsPath == null) {
FileUtils.deleteDirectory(cop);
@ -118,7 +119,7 @@ public class IndexCollection implements Index {
// open index and array files
this.arrays = new HashMap<String, FixedWidthArray>(); // all entries will be dynamically created with getArray()
index = openIndexFile(path, filenameStub, indexOrder, loadfactor, rowdef, 0);
index = openIndexFile(path, this.keylength, filenameStub, indexOrder, loadfactor, payloadrow, 0);
openAllArrayFiles(false, indexOrder);
} else {
// calculate initialSpace
@ -132,7 +133,7 @@ public class IndexCollection implements Index {
final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16);
final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16);
try {
array = openArrayFile(partitionNumber, serialNumber, indexOrder, true);
array = openArrayFile(this.path, this.filenameStub, this.keylength, partitionNumber, serialNumber, indexOrder, this.payloadrow.objectsize, true);
initialSpace += array.size();
array.close();
} catch (final IOException e) {
@ -317,7 +318,7 @@ public class IndexCollection implements Index {
final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16);
final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16);
try {
array = openArrayFile(partitionNumber, serialNumber, indexOrder, true);
array = openArrayFile(this.path, this.filenameStub, this.keylength, partitionNumber, serialNumber, indexOrder, this.payloadrow.objectsize, true);
} catch (final IOException e) {
e.printStackTrace();
continue;
@ -390,7 +391,67 @@ public class IndexCollection implements Index {
if (doublecount > 0) Log.logWarning("STARTUP", "found " + doublecount + " RWI entries with references to several collections. All have been fixed (zombies still exists).");
}
private ObjectIndex openIndexFile(final File path, final String filenameStub, final ByteOrder indexOrder,
/**
* enumerate all index files and return a set of reference hashes
* @param path
* @param filenameStub
* @param keylength
* @param indexOrder
* @param payloadrow
* @return
* @throws IOException
*/
public static IntegerHandleIndex referenceHashes(
final File path,
final String filenameStub,
final int keylength,
final ByteOrder indexOrder,
final Row payloadrow) throws IOException {
final String[] list = path.list();
FixedWidthArray array;
IntegerHandleIndex references = new IntegerHandleIndex(keylength, indexOrder, 100000);
for (int i = 0; i < list.length; i++) if (list[i].endsWith(".kca")) {
// open array
final int pos = list[i].indexOf('.');
if (pos < 0) continue;
final int partitionNumber = Integer.parseInt(list[i].substring(pos + 9, pos + 11), 16);
final int serialNumber = Integer.parseInt(list[i].substring(pos + 12, pos + 14), 16);
try {
array = openArrayFile(path, filenameStub, keylength, partitionNumber, serialNumber, indexOrder, payloadrow.objectsize, true);
} catch (final IOException e) {
e.printStackTrace();
continue;
}
// loop over all elements in array and collect reference hashes
Row.EntryIndex arrayrow;
final Iterator<EntryIndex> ei = array.contentRows(-1);
final long start = System.currentTimeMillis();
long lastlog = start;
int count = 0;
while (ei.hasNext()) {
arrayrow = ei.next();
if (arrayrow == null) continue;
final RowSet collection = new RowSet(payloadrow, arrayrow);
final int chunkcountInArray = collection.size();
for (int j = 0; j < chunkcountInArray; j++) {
references.inc(collection.get(j, false).getColBytes(0));
}
count++;
// write a log
if (System.currentTimeMillis() - lastlog > 30000) {
Log.logFine("COLLECTION INDEX STARTUP", "scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array");
lastlog = System.currentTimeMillis();
}
}
}
return references;
}
private static ObjectIndex openIndexFile(
final File path, int keylength, final String filenameStub, final ByteOrder indexOrder,
final int loadfactor, final Row rowdef, final int initialSpace) throws IOException {
// open/create index table
final File f = new File(path, filenameStub + ".index");
@ -413,12 +474,14 @@ public class IndexCollection implements Index {
return theindex;
}
private FixedWidthArray openArrayFile(final int partitionNumber, final int serialNumber, final ByteOrder indexOrder, final boolean create) throws IOException {
final File f = arrayFile(path, filenameStub, loadfactor, payloadrow.objectsize, partitionNumber, serialNumber);
private static FixedWidthArray openArrayFile(
File path, String filenameStub, int keylength,
final int partitionNumber, final int serialNumber, final ByteOrder indexOrder, int objectsize, final boolean create) throws IOException {
final File f = arrayFile(path, filenameStub, loadfactor, objectsize, partitionNumber, serialNumber);
final int load = arrayCapacity(partitionNumber);
final Row rowdef = new Row(
"byte[] key-" + keylength + "," +
"byte[] collection-" + (RowCollection.exportOverheadSize + load * this.payloadrow.objectsize),
"byte[] collection-" + (RowCollection.exportOverheadSize + load * objectsize),
indexOrder,
0
);
@ -433,7 +496,7 @@ public class IndexCollection implements Index {
FixedWidthArray array = arrays.get(accessKey);
if (array != null) return array;
try {
array = openArrayFile(partitionNumber, serialNumber, indexOrder, true);
array = openArrayFile(this.path, this.filenameStub, this.keylength, partitionNumber, serialNumber, indexOrder, this.payloadrow.objectsize, true);
} catch (final IOException e) {
e.printStackTrace();
return null;
@ -442,14 +505,14 @@ public class IndexCollection implements Index {
return array;
}
private int arrayCapacity(final int arrayCounter) {
private static int arrayCapacity(final int arrayCounter) {
if (arrayCounter < 0) return 0;
int load = loadfactor;
for (int i = 0; i < arrayCounter; i++) load = load * loadfactor;
return load;
}
private int arrayIndex(final int requestedCapacity) throws kelondroOutOfLimitsException{
private static int arrayIndex(final int requestedCapacity) throws kelondroOutOfLimitsException{
// the requestedCapacity is the number of wanted chunks
int load = 1, i = 0;
while (true) {

@ -280,6 +280,7 @@ public final class FileUtils {
while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c;
} finally {
if (fis != null) try { fis.close(); } catch (final Exception e) {}
fis = null;
}
return buffer;
}

@ -1915,7 +1915,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
boolean hasDoneSomething = false;
if (this.dhtDispatcher.cloudSize() > this.webIndex.seedDB.scheme.verticalPartitions() * 4) {
log.logInfo("dhtTransferJob: no selection, too many entries in transmission cloud: " + this.dhtDispatcher.cloudSize());
} else if (MemoryControl.available() < 1024*1024*20) {
} else if (MemoryControl.available() < 1024*1024*25) {
log.logInfo("dhtTransferJob: no selection, too less memory available : " + (MemoryControl.available() / 1024 / 1024) + " MB");
} else {
String startHash = PeerSelection.selectTransferStart();

Loading…
Cancel
Save