From 2c2dcd12a2b2ac611b0c151ab54aeee77742c303 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 10 Apr 2008 13:24:55 +0000 Subject: [PATCH] - enhanced performance of Eco-Tables: less time-consuming size() - operations - will increase speed of indexing and collection.index creation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4675 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../anomic/http/JakartaCommonsHttpClient.java | 2 +- .../kelondro/kelondroBufferedEcoFS.java | 22 +++--- .../kelondro/kelondroBufferedIOChunks.java | 4 +- source/de/anomic/kelondro/kelondroEcoFS.java | 71 +++++++++++++------ .../de/anomic/kelondro/kelondroEcoTable.java | 6 +- .../plasma/crawler/plasmaCrawlQueues.java | 2 +- source/de/anomic/plasma/plasmaWordIndex.java | 6 +- source/de/anomic/server/serverCore.java | 2 +- 8 files changed, 73 insertions(+), 42 deletions(-) diff --git a/source/de/anomic/http/JakartaCommonsHttpClient.java b/source/de/anomic/http/JakartaCommonsHttpClient.java index 673168f61..ae3901593 100644 --- a/source/de/anomic/http/JakartaCommonsHttpClient.java +++ b/source/de/anomic/http/JakartaCommonsHttpClient.java @@ -90,7 +90,7 @@ public class JakartaCommonsHttpClient extends de.anomic.http.HttpClient { // conManager.getParams().setDefaultMaxConnectionsPerHost(4); // default 2 conManager.getParams().setMaxTotalConnections(50); // default 20 conManager.getParams().setConnectionTimeout(60000); // set a default timeout - conManager.getParams().setDefaultMaxConnectionsPerHost(10); // prevent DoS by mistake + conManager.getParams().setDefaultMaxConnectionsPerHost(20); // prevent DoS by mistake // TODO should this be configurable? // accept self-signed or untrusted certificates diff --git a/source/de/anomic/kelondro/kelondroBufferedEcoFS.java b/source/de/anomic/kelondro/kelondroBufferedEcoFS.java index 7b3dd7711..2fdc17422 100644 --- a/source/de/anomic/kelondro/kelondroBufferedEcoFS.java +++ b/source/de/anomic/kelondro/kelondroBufferedEcoFS.java @@ -32,8 +32,9 @@ import java.util.TreeMap; /** * The kelondroBufferedEcoFS extends the IO reduction to EcoFS by providing a - * write buffer to elements that are inside the filed entries of the file - * That means, each time, an entry is written to the end of the file, it is not buffered + * write buffer to elements that are INSIDE the filed entries of the file + * That means, each time, an entry is written to the end of the file, it is NOT buffered here, + * but possibly buffered in the enclosed kelondroEcoFS */ public class kelondroBufferedEcoFS { @@ -93,9 +94,10 @@ public class kelondroBufferedEcoFS { public synchronized void put(long index, byte[] b, int start) throws IOException { assert b.length - start >= efs.recordsize; - if (index > size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); - if (index == efs.size()) { - efs.put(index, b, start); + long s = size(); + if (index > s) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); + if (index == s) { + efs.add(b, start); } else { byte[] bb = new byte[efs.recordsize]; System.arraycopy(b, start, bb, 0, efs.recordsize); @@ -105,13 +107,14 @@ public class kelondroBufferedEcoFS { } public synchronized void add(byte[] b, int start) throws IOException { - put(size(), b, start); + assert b.length - start >= efs.recordsize; + // index == size() == efs.size(); + efs.add(b, start); } public synchronized void cleanLast(byte[] b, int start) throws IOException { assert b.length - start >= efs.recordsize; - Long i = new Long(size() - 1); - byte[] bb = buffer.remove(i); + byte[] bb = buffer.remove(new Long(size() - 1)); if (bb == null) { efs.cleanLast(b, start); } else { @@ -121,8 +124,7 @@ public class kelondroBufferedEcoFS { } public synchronized void cleanLast() throws IOException { - Long i = new Long(size() - 1); - buffer.remove(i); + buffer.remove(new Long(size() - 1)); efs.cleanLast(); } diff --git a/source/de/anomic/kelondro/kelondroBufferedIOChunks.java b/source/de/anomic/kelondro/kelondroBufferedIOChunks.java index 58cca00b5..2bbc44b5e 100644 --- a/source/de/anomic/kelondro/kelondroBufferedIOChunks.java +++ b/source/de/anomic/kelondro/kelondroBufferedIOChunks.java @@ -92,7 +92,7 @@ public final class kelondroBufferedIOChunks extends kelondroAbstractIOChunks imp synchronized (this.buffer) { byte[] bb = (byte[]) buffer.get(new Long(pos)); if (bb == null) { - // entry not known, read direktly from IO + // entry not known, read directly from IO synchronized (this.ra) { this.ra.seek(pos + off); return ra.read(b, off, len); @@ -100,7 +100,7 @@ public final class kelondroBufferedIOChunks extends kelondroAbstractIOChunks imp } // use buffered entry if (bb.length >= off + len) { - // the bufferd entry is long enough + // the buffered entry is long enough System.arraycopy(bb, off, b, off, len); return len; } diff --git a/source/de/anomic/kelondro/kelondroEcoFS.java b/source/de/anomic/kelondro/kelondroEcoFS.java index 161128a24..532213566 100644 --- a/source/de/anomic/kelondro/kelondroEcoFS.java +++ b/source/de/anomic/kelondro/kelondroEcoFS.java @@ -68,7 +68,8 @@ public class kelondroEcoFS { /** * stay below hard disc cache (is that necessary?) */ - private static final int maxBuffer = 4 * 1024; + private static final int maxReadCache = 8 * 1024; + private static final int maxWriteBuffer = 4 * 1024; public kelondroEcoFS(File tablefile, int recordsize) throws IOException { @@ -101,9 +102,8 @@ public class kelondroEcoFS { } // initialize cache and buffer - int maxrecords = Math.max(1, maxBuffer / recordsize); - cache = new byte[maxrecords * recordsize]; - buffer = new byte[maxrecords * recordsize]; + cache = new byte[Math.max(1, (int) (maxReadCache / recordsize)) * recordsize]; + buffer = new byte[Math.max(1, (int) (maxWriteBuffer / recordsize)) * recordsize]; this.buffercount = 0; // first-time read of cache @@ -265,7 +265,8 @@ public class kelondroEcoFS { public synchronized void put(long index, byte[] b, int start) throws IOException { assert b.length - start >= this.recordsize; - if (index > size()) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); + long s = size(); + if (index > s) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); // check if this is an empty entry if (isClean(b , start, this.recordsize)) { @@ -288,12 +289,11 @@ public class kelondroEcoFS { System.arraycopy(b, start, this.buffer, q * this.recordsize, this.recordsize); return; } - if (index == size()) { + if (index == s) { // append the record to the end of the file; // look if there is space in the buffer - int bufferpos = (int) (index - filesize()); - if (bufferpos >= this.buffer.length / this.recordsize) { + if (this.buffercount >= this.buffer.length / this.recordsize) { assert this.buffercount == this.buffer.length / this.recordsize; // the record does not fit in current buffer // write buffer @@ -302,7 +302,7 @@ public class kelondroEcoFS { System.arraycopy(b, start, this.buffer, 0, this.recordsize); this.buffercount = 1; } else { - System.arraycopy(b, start, this.buffer, bufferpos * this.recordsize, this.recordsize); + System.arraycopy(b, start, this.buffer, this.buffercount * this.recordsize, this.recordsize); this.buffercount++; } assert this.buffercount <= this.buffer.length / this.recordsize; @@ -315,7 +315,32 @@ public class kelondroEcoFS { } public synchronized void add(byte[] b, int start) throws IOException { - put(size(), b, start); + // index == size() == filesize() + (long) this.buffercount + + assert b.length - start >= this.recordsize; + + // check if this is an empty entry + if (isClean(b , start, this.recordsize)) { + // it is not possible to add a clean record at the end of a EcoFS, because + // such records should cause the record to shrink + throw new IOException("add: record at end is clean"); + } + + // append the record to the end of the file; + // look if there is space in the buffer + if (this.buffercount >= this.buffer.length / this.recordsize) { + assert this.buffercount == this.buffer.length / this.recordsize; + // the record does not fit in current buffer + // write buffer + flushBuffer(); + // write new entry to buffer + System.arraycopy(b, start, this.buffer, 0, this.recordsize); + this.buffercount = 1; + } else { + System.arraycopy(b, start, this.buffer, this.buffercount * this.recordsize, this.recordsize); + this.buffercount++; + } + assert this.buffercount <= this.buffer.length / this.recordsize; } private boolean isClean(byte[] b, int offset, int length) { @@ -366,8 +391,9 @@ public class kelondroEcoFS { */ public synchronized void clean(long index, byte[] b, int start) throws IOException { assert b.length - start >= this.recordsize; - if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); - if (index == size() - 1) { + long s = size(); + if (index >= s) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + s + ")"); + if (index == s - 1) { cleanLast(b, start); return; } @@ -407,8 +433,9 @@ public class kelondroEcoFS { * @throws IOException */ public synchronized void clean(long index) throws IOException { - if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); - if (index == size() - 1) { + long s = size(); + if (index >= s) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + s + ")"); + if (index == s - 1) { cleanLast(); return; } @@ -461,8 +488,9 @@ public class kelondroEcoFS { private synchronized void cleanLast0(byte[] b, int start) throws IOException { assert b.length - start >= this.recordsize; // check if index is inside of cache - int p = inCache(this.size() - 1); - int q = (p >= 0) ? -1 : inBuffer(this.size() - 1); + long s = this.size(); + int p = inCache(s - 1); + int q = (p >= 0) ? -1 : inBuffer(s - 1); if ((p < 0) && (q < 0)) { // the index is outside of cache and buffer index. shift cache window fillCache(this.size() - 1); @@ -474,7 +502,7 @@ public class kelondroEcoFS { System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize); // shrink cache and file assert this.buffercount == 0; - this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); + this.raf.setLength((long) (s - 1) * (long) this.recordsize); this.cachecount--; return; } @@ -506,12 +534,13 @@ public class kelondroEcoFS { private synchronized void cleanLast0() throws IOException { // check if index is inside of cache - long p = inCache(this.size() - 1); - long q = (p >= 0) ? -1 : inBuffer(this.size() - 1); + long s = this.size(); + long p = inCache(s - 1); + long q = (p >= 0) ? -1 : inBuffer(s - 1); if (p >= 0) { // shrink cache and file assert this.buffercount == 0; - this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); + this.raf.setLength((long) (s - 1) * (long) this.recordsize); this.cachecount--; return; } @@ -523,7 +552,7 @@ public class kelondroEcoFS { } // check if file should shrink assert this.buffercount == 0; - this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); + this.raf.setLength((long) (s - 1) * (long) this.recordsize); } public static class ChunkIterator implements Iterator { diff --git a/source/de/anomic/kelondro/kelondroEcoTable.java b/source/de/anomic/kelondro/kelondroEcoTable.java index f46f9611d..85f77c299 100644 --- a/source/de/anomic/kelondro/kelondroEcoTable.java +++ b/source/de/anomic/kelondro/kelondroEcoTable.java @@ -196,7 +196,7 @@ public class kelondroEcoTable implements kelondroIndex { } try { - assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", doubles.size() = " + doubles.size() + ", fail = " + fail + ", i = " + i; + assert file.size() == index.size() + doubles.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", doubles.size() = " + doubles.size() + ", fail = " + fail + ", i = " + i; } catch (IOException e) { e.printStackTrace(); } @@ -269,7 +269,7 @@ public class kelondroEcoTable implements kelondroIndex { assert table.size() == i; table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true)); } - file.put(i, row.bytes(), 0); + file.add(row.bytes(), 0); assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); return true; } @@ -327,7 +327,7 @@ public class kelondroEcoTable implements kelondroIndex { } public synchronized Entry get(byte[] key) throws IOException { - assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", fail = " + fail; assert ((table == null) || (table.size() == index.size())); int i = index.geti(key); if (i == -1) return null; diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java index 7583da12a..a26e6a01a 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java @@ -295,7 +295,7 @@ public class plasmaCrawlQueues { if (seed == null) return false; // we know a peer which should provide remote crawl entries. load them now. - rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 10); + rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 20); if (reader == null) return true; // parse the rss rssReader.Item item; diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 035500414..31b248b3f 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -67,8 +67,8 @@ public final class plasmaWordIndex implements indexRI { // environment constants public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes - public static final int wCacheMaxChunk = 500; // maximum number of references for each urlhash - public static final int lowcachedivisor = 1000; + public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash + public static final int lowcachedivisor = 1200; public static final int maxCollectionPartition = 7; // should be 7 private final kelondroByteOrder indexOrder = kelondroBase64Order.enhancedCoder; @@ -208,7 +208,7 @@ public final class plasmaWordIndex implements indexRI { public void dhtFlushControl(indexRAMRI theCache) { // check for forced flush int l = 0; - // flush elements that are too big. This flushinfg depends on the fact that the flush rule + // flush elements that are too big. This flushing depends on the fact that the flush rule // selects the biggest elements first for flushing. If it does not for any reason, the following // loop would not terminate. To ensure termination an additional counter is used while ((l++ < 100) && (theCache.maxURLinCache() > wCacheMaxChunk)) { diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 4ecee876b..13b95926b 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -480,7 +480,7 @@ public final class serverCore extends serverAbstractBusyThread implements server Thread.interrupted(); // shut down all busySessions - for (Session session: this.busySessions) { + if (this.busySessions != null) for (Session session: this.busySessions) { try {session.interrupt();} catch (SecurityException e ) {e.printStackTrace();} }