- enhanced performance of Eco-Tables: less time-consuming size() - operations

- will increase speed of indexing and collection.index creation


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4675 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent e356625b22
commit 2c2dcd12a2

@ -90,7 +90,7 @@ public class JakartaCommonsHttpClient extends de.anomic.http.HttpClient {
// conManager.getParams().setDefaultMaxConnectionsPerHost(4); // default 2 // conManager.getParams().setDefaultMaxConnectionsPerHost(4); // default 2
conManager.getParams().setMaxTotalConnections(50); // default 20 conManager.getParams().setMaxTotalConnections(50); // default 20
conManager.getParams().setConnectionTimeout(60000); // set a default timeout conManager.getParams().setConnectionTimeout(60000); // set a default timeout
conManager.getParams().setDefaultMaxConnectionsPerHost(10); // prevent DoS by mistake conManager.getParams().setDefaultMaxConnectionsPerHost(20); // prevent DoS by mistake
// TODO should this be configurable? // TODO should this be configurable?
// accept self-signed or untrusted certificates // accept self-signed or untrusted certificates

@ -32,8 +32,9 @@ import java.util.TreeMap;
/** /**
* The kelondroBufferedEcoFS extends the IO reduction to EcoFS by providing a * The kelondroBufferedEcoFS extends the IO reduction to EcoFS by providing a
* write buffer to elements that are inside the filed entries of the file * write buffer to elements that are INSIDE the filed entries of the file
* That means, each time, an entry is written to the end of the file, it is not buffered * That means, each time, an entry is written to the end of the file, it is NOT buffered here,
* but possibly buffered in the enclosed kelondroEcoFS
*/ */
public class kelondroBufferedEcoFS { public class kelondroBufferedEcoFS {
@ -93,9 +94,10 @@ public class kelondroBufferedEcoFS {
public synchronized void put(long index, byte[] b, int start) throws IOException { public synchronized void put(long index, byte[] b, int start) throws IOException {
assert b.length - start >= efs.recordsize; assert b.length - start >= efs.recordsize;
if (index > size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); long s = size();
if (index == efs.size()) { if (index > s) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.put(" + index + ") outside bounds (" + this.size() + ")");
efs.put(index, b, start); if (index == s) {
efs.add(b, start);
} else { } else {
byte[] bb = new byte[efs.recordsize]; byte[] bb = new byte[efs.recordsize];
System.arraycopy(b, start, bb, 0, efs.recordsize); System.arraycopy(b, start, bb, 0, efs.recordsize);
@ -105,13 +107,14 @@ public class kelondroBufferedEcoFS {
} }
public synchronized void add(byte[] b, int start) throws IOException { public synchronized void add(byte[] b, int start) throws IOException {
put(size(), b, start); assert b.length - start >= efs.recordsize;
// index == size() == efs.size();
efs.add(b, start);
} }
public synchronized void cleanLast(byte[] b, int start) throws IOException { public synchronized void cleanLast(byte[] b, int start) throws IOException {
assert b.length - start >= efs.recordsize; assert b.length - start >= efs.recordsize;
Long i = new Long(size() - 1); byte[] bb = buffer.remove(new Long(size() - 1));
byte[] bb = buffer.remove(i);
if (bb == null) { if (bb == null) {
efs.cleanLast(b, start); efs.cleanLast(b, start);
} else { } else {
@ -121,8 +124,7 @@ public class kelondroBufferedEcoFS {
} }
public synchronized void cleanLast() throws IOException { public synchronized void cleanLast() throws IOException {
Long i = new Long(size() - 1); buffer.remove(new Long(size() - 1));
buffer.remove(i);
efs.cleanLast(); efs.cleanLast();
} }

@ -92,7 +92,7 @@ public final class kelondroBufferedIOChunks extends kelondroAbstractIOChunks imp
synchronized (this.buffer) { synchronized (this.buffer) {
byte[] bb = (byte[]) buffer.get(new Long(pos)); byte[] bb = (byte[]) buffer.get(new Long(pos));
if (bb == null) { if (bb == null) {
// entry not known, read direktly from IO // entry not known, read directly from IO
synchronized (this.ra) { synchronized (this.ra) {
this.ra.seek(pos + off); this.ra.seek(pos + off);
return ra.read(b, off, len); return ra.read(b, off, len);
@ -100,7 +100,7 @@ public final class kelondroBufferedIOChunks extends kelondroAbstractIOChunks imp
} }
// use buffered entry // use buffered entry
if (bb.length >= off + len) { if (bb.length >= off + len) {
// the bufferd entry is long enough // the buffered entry is long enough
System.arraycopy(bb, off, b, off, len); System.arraycopy(bb, off, b, off, len);
return len; return len;
} }

@ -68,7 +68,8 @@ public class kelondroEcoFS {
/** /**
* stay below hard disc cache (is that necessary?) * stay below hard disc cache (is that necessary?)
*/ */
private static final int maxBuffer = 4 * 1024; private static final int maxReadCache = 8 * 1024;
private static final int maxWriteBuffer = 4 * 1024;
public kelondroEcoFS(File tablefile, int recordsize) throws IOException { public kelondroEcoFS(File tablefile, int recordsize) throws IOException {
@ -101,9 +102,8 @@ public class kelondroEcoFS {
} }
// initialize cache and buffer // initialize cache and buffer
int maxrecords = Math.max(1, maxBuffer / recordsize); cache = new byte[Math.max(1, (int) (maxReadCache / recordsize)) * recordsize];
cache = new byte[maxrecords * recordsize]; buffer = new byte[Math.max(1, (int) (maxWriteBuffer / recordsize)) * recordsize];
buffer = new byte[maxrecords * recordsize];
this.buffercount = 0; this.buffercount = 0;
// first-time read of cache // first-time read of cache
@ -265,7 +265,8 @@ public class kelondroEcoFS {
public synchronized void put(long index, byte[] b, int start) throws IOException { public synchronized void put(long index, byte[] b, int start) throws IOException {
assert b.length - start >= this.recordsize; assert b.length - start >= this.recordsize;
if (index > size()) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); long s = size();
if (index > s) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")");
// check if this is an empty entry // check if this is an empty entry
if (isClean(b , start, this.recordsize)) { if (isClean(b , start, this.recordsize)) {
@ -288,12 +289,11 @@ public class kelondroEcoFS {
System.arraycopy(b, start, this.buffer, q * this.recordsize, this.recordsize); System.arraycopy(b, start, this.buffer, q * this.recordsize, this.recordsize);
return; return;
} }
if (index == size()) { if (index == s) {
// append the record to the end of the file; // append the record to the end of the file;
// look if there is space in the buffer // look if there is space in the buffer
int bufferpos = (int) (index - filesize()); if (this.buffercount >= this.buffer.length / this.recordsize) {
if (bufferpos >= this.buffer.length / this.recordsize) {
assert this.buffercount == this.buffer.length / this.recordsize; assert this.buffercount == this.buffer.length / this.recordsize;
// the record does not fit in current buffer // the record does not fit in current buffer
// write buffer // write buffer
@ -302,7 +302,7 @@ public class kelondroEcoFS {
System.arraycopy(b, start, this.buffer, 0, this.recordsize); System.arraycopy(b, start, this.buffer, 0, this.recordsize);
this.buffercount = 1; this.buffercount = 1;
} else { } else {
System.arraycopy(b, start, this.buffer, bufferpos * this.recordsize, this.recordsize); System.arraycopy(b, start, this.buffer, this.buffercount * this.recordsize, this.recordsize);
this.buffercount++; this.buffercount++;
} }
assert this.buffercount <= this.buffer.length / this.recordsize; assert this.buffercount <= this.buffer.length / this.recordsize;
@ -315,7 +315,32 @@ public class kelondroEcoFS {
} }
public synchronized void add(byte[] b, int start) throws IOException { public synchronized void add(byte[] b, int start) throws IOException {
put(size(), b, start); // index == size() == filesize() + (long) this.buffercount
assert b.length - start >= this.recordsize;
// check if this is an empty entry
if (isClean(b , start, this.recordsize)) {
// it is not possible to add a clean record at the end of a EcoFS, because
// such records should cause the record to shrink
throw new IOException("add: record at end is clean");
}
// append the record to the end of the file;
// look if there is space in the buffer
if (this.buffercount >= this.buffer.length / this.recordsize) {
assert this.buffercount == this.buffer.length / this.recordsize;
// the record does not fit in current buffer
// write buffer
flushBuffer();
// write new entry to buffer
System.arraycopy(b, start, this.buffer, 0, this.recordsize);
this.buffercount = 1;
} else {
System.arraycopy(b, start, this.buffer, this.buffercount * this.recordsize, this.recordsize);
this.buffercount++;
}
assert this.buffercount <= this.buffer.length / this.recordsize;
} }
private boolean isClean(byte[] b, int offset, int length) { private boolean isClean(byte[] b, int offset, int length) {
@ -366,8 +391,9 @@ public class kelondroEcoFS {
*/ */
public synchronized void clean(long index, byte[] b, int start) throws IOException { public synchronized void clean(long index, byte[] b, int start) throws IOException {
assert b.length - start >= this.recordsize; assert b.length - start >= this.recordsize;
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); long s = size();
if (index == size() - 1) { if (index >= s) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + s + ")");
if (index == s - 1) {
cleanLast(b, start); cleanLast(b, start);
return; return;
} }
@ -407,8 +433,9 @@ public class kelondroEcoFS {
* @throws IOException * @throws IOException
*/ */
public synchronized void clean(long index) throws IOException { public synchronized void clean(long index) throws IOException {
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); long s = size();
if (index == size() - 1) { if (index >= s) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + s + ")");
if (index == s - 1) {
cleanLast(); cleanLast();
return; return;
} }
@ -461,8 +488,9 @@ public class kelondroEcoFS {
private synchronized void cleanLast0(byte[] b, int start) throws IOException { private synchronized void cleanLast0(byte[] b, int start) throws IOException {
assert b.length - start >= this.recordsize; assert b.length - start >= this.recordsize;
// check if index is inside of cache // check if index is inside of cache
int p = inCache(this.size() - 1); long s = this.size();
int q = (p >= 0) ? -1 : inBuffer(this.size() - 1); int p = inCache(s - 1);
int q = (p >= 0) ? -1 : inBuffer(s - 1);
if ((p < 0) && (q < 0)) { if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window // the index is outside of cache and buffer index. shift cache window
fillCache(this.size() - 1); fillCache(this.size() - 1);
@ -474,7 +502,7 @@ public class kelondroEcoFS {
System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize); System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize);
// shrink cache and file // shrink cache and file
assert this.buffercount == 0; assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); this.raf.setLength((long) (s - 1) * (long) this.recordsize);
this.cachecount--; this.cachecount--;
return; return;
} }
@ -506,12 +534,13 @@ public class kelondroEcoFS {
private synchronized void cleanLast0() throws IOException { private synchronized void cleanLast0() throws IOException {
// check if index is inside of cache // check if index is inside of cache
long p = inCache(this.size() - 1); long s = this.size();
long q = (p >= 0) ? -1 : inBuffer(this.size() - 1); long p = inCache(s - 1);
long q = (p >= 0) ? -1 : inBuffer(s - 1);
if (p >= 0) { if (p >= 0) {
// shrink cache and file // shrink cache and file
assert this.buffercount == 0; assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); this.raf.setLength((long) (s - 1) * (long) this.recordsize);
this.cachecount--; this.cachecount--;
return; return;
} }
@ -523,7 +552,7 @@ public class kelondroEcoFS {
} }
// check if file should shrink // check if file should shrink
assert this.buffercount == 0; assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); this.raf.setLength((long) (s - 1) * (long) this.recordsize);
} }
public static class ChunkIterator implements Iterator<byte[]> { public static class ChunkIterator implements Iterator<byte[]> {

@ -196,7 +196,7 @@ public class kelondroEcoTable implements kelondroIndex {
} }
try { try {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", doubles.size() = " + doubles.size() + ", fail = " + fail + ", i = " + i; assert file.size() == index.size() + doubles.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", doubles.size() = " + doubles.size() + ", fail = " + fail + ", i = " + i;
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -269,7 +269,7 @@ public class kelondroEcoTable implements kelondroIndex {
assert table.size() == i; assert table.size() == i;
table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true)); table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
} }
file.put(i, row.bytes(), 0); file.add(row.bytes(), 0);
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
return true; return true;
} }
@ -327,7 +327,7 @@ public class kelondroEcoTable implements kelondroIndex {
} }
public synchronized Entry get(byte[] key) throws IOException { public synchronized Entry get(byte[] key) throws IOException {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", fail = " + fail;
assert ((table == null) || (table.size() == index.size())); assert ((table == null) || (table.size() == index.size()));
int i = index.geti(key); int i = index.geti(key);
if (i == -1) return null; if (i == -1) return null;

@ -295,7 +295,7 @@ public class plasmaCrawlQueues {
if (seed == null) return false; if (seed == null) return false;
// we know a peer which should provide remote crawl entries. load them now. // we know a peer which should provide remote crawl entries. load them now.
rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 10); rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 20);
if (reader == null) return true; if (reader == null) return true;
// parse the rss // parse the rss
rssReader.Item item; rssReader.Item item;

@ -67,8 +67,8 @@ public final class plasmaWordIndex implements indexRI {
// environment constants // environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final int wCacheMaxChunk = 500; // maximum number of references for each urlhash public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash
public static final int lowcachedivisor = 1000; public static final int lowcachedivisor = 1200;
public static final int maxCollectionPartition = 7; // should be 7 public static final int maxCollectionPartition = 7; // should be 7
private final kelondroByteOrder indexOrder = kelondroBase64Order.enhancedCoder; private final kelondroByteOrder indexOrder = kelondroBase64Order.enhancedCoder;
@ -208,7 +208,7 @@ public final class plasmaWordIndex implements indexRI {
public void dhtFlushControl(indexRAMRI theCache) { public void dhtFlushControl(indexRAMRI theCache) {
// check for forced flush // check for forced flush
int l = 0; int l = 0;
// flush elements that are too big. This flushinfg depends on the fact that the flush rule // flush elements that are too big. This flushing depends on the fact that the flush rule
// selects the biggest elements first for flushing. If it does not for any reason, the following // selects the biggest elements first for flushing. If it does not for any reason, the following
// loop would not terminate. To ensure termination an additional counter is used // loop would not terminate. To ensure termination an additional counter is used
while ((l++ < 100) && (theCache.maxURLinCache() > wCacheMaxChunk)) { while ((l++ < 100) && (theCache.maxURLinCache() > wCacheMaxChunk)) {

@ -480,7 +480,7 @@ public final class serverCore extends serverAbstractBusyThread implements server
Thread.interrupted(); Thread.interrupted();
// shut down all busySessions // shut down all busySessions
for (Session session: this.busySessions) { if (this.busySessions != null) for (Session session: this.busySessions) {
try {session.interrupt();} catch (SecurityException e ) {e.printStackTrace();} try {session.interrupt();} catch (SecurityException e ) {e.printStackTrace();}
} }

Loading…
Cancel
Save