From 45ad1c3dd59b2c94c5b3b79539b4147aa910b937 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 23 Nov 2008 18:25:40 +0000 Subject: [PATCH] - re-activated concurrent iterator for EcoFiles - added javadoc for new concurrent intialization in kelondroBytesLongMap - switched default value for commons storage to false - version step git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5361 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- defaults/yacy.init | 2 +- .../anomic/kelondro/kelondroBytesLongMap.java | 33 ++++++++++++++++--- .../kelondro/kelondroChunkIterator.java | 30 +++++------------ 4 files changed, 39 insertions(+), 28 deletions(-) diff --git a/build.properties b/build.properties index 0d6099a18..502b08c99 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.5 javacTarget=1.5 # Release Configuration -releaseVersion=0.614 +releaseVersion=0.615 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz diff --git a/defaults/yacy.init b/defaults/yacy.init index 34e1bd88f..7531c5570 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -265,7 +265,7 @@ indexSecondaryPath= # are stored back into the index, and references with bad ranking are sorted out. Such sorted-out references can be stored # for later use (but there is no at this time). If the sorted-out references should be stored, the following property should be # set to true. If set to false, they are abandoned (deleted), and previously stored commons are removed. -index.storeCommons=true +index.storeCommons=false # the path to the LISTS files. Most lists are used to filter web content listsPath=DATA/LISTS diff --git a/source/de/anomic/kelondro/kelondroBytesLongMap.java b/source/de/anomic/kelondro/kelondroBytesLongMap.java index 51969fc90..2078e11d7 100644 --- a/source/de/anomic/kelondro/kelondroBytesLongMap.java +++ b/source/de/anomic/kelondro/kelondroBytesLongMap.java @@ -132,6 +132,16 @@ public class kelondroBytesLongMap { index = null; } + /** + * this method creates a concurrent thread that can take entries that are used to initialize the map + * it should be used when a bytesLongMap is initialized when a file is read. Concurrency of FileIO and + * map creation will speed up the initialization process. + * @param keylength + * @param objectOrder + * @param space + * @param bufferSize + * @return + */ public static initDataConsumer asynchronusInitializer(final int keylength, final kelondroByteOrder objectOrder, final int space, int bufferSize) { initDataConsumer initializer = new initDataConsumer(new kelondroBytesLongMap(keylength, objectOrder, space), bufferSize); ExecutorService service = Executors.newSingleThreadExecutor(); @@ -140,7 +150,7 @@ public class kelondroBytesLongMap { return initializer; } - public static class entry { + private static class entry { public byte[] key; public long l; public entry(final byte[] key, final long l) { @@ -165,6 +175,11 @@ public class kelondroBytesLongMap { this.result = result; } + /** + * hand over another entry that shall be inserted into the BytesLongMap with an addl method + * @param key + * @param l + */ public void consume(final byte[] key, final long l) { try { cache.put(new entry(key, l)); @@ -173,6 +188,10 @@ public class kelondroBytesLongMap { } } + /** + * to signal the initialization thread that no more entries will be sublitted with consumer() + * this method must be called. The process will not terminate if this is not called before. + */ public void finish() { try { cache.put(poison); @@ -181,6 +200,14 @@ public class kelondroBytesLongMap { } } + /** + * this must be called after a finish() was called. this method blocks until all entries + * had been processed, and the content was sorted. It returns the kelondroBytesLongMap + * that the user wanted to initialize + * @return + * @throws InterruptedException + * @throws ExecutionException + */ public kelondroBytesLongMap result() throws InterruptedException, ExecutionException { return this.result.get(); } @@ -188,9 +215,7 @@ public class kelondroBytesLongMap { public kelondroBytesLongMap call() throws IOException { try { entry c; - while(true) { - c = cache.take(); - if (c == poison) break; + while ((c = cache.take()) != poison) { map.addl(c.key, c.l); } } catch (InterruptedException e) { diff --git a/source/de/anomic/kelondro/kelondroChunkIterator.java b/source/de/anomic/kelondro/kelondroChunkIterator.java index 8800923e3..8c5e8f31f 100644 --- a/source/de/anomic/kelondro/kelondroChunkIterator.java +++ b/source/de/anomic/kelondro/kelondroChunkIterator.java @@ -24,8 +24,6 @@ package de.anomic.kelondro; -import java.io.BufferedInputStream; -import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -34,6 +32,9 @@ import java.util.Iterator; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; public class kelondroChunkIterator implements Iterator { @@ -48,7 +49,7 @@ public class kelondroChunkIterator implements Iterator { * @param chunksize: the size of the chunks that are returned by next(). remaining bytes until the lenght of recordsize are skipped * @throws FileNotFoundException */ - + /* private final DataInputStream stream; private byte[] nextBytes; @@ -98,7 +99,7 @@ public class kelondroChunkIterator implements Iterator { } - /* + */ ExecutorService service = Executors.newFixedThreadPool(2); filechunkProducer producer; filechunkSlicer slicer; @@ -133,9 +134,7 @@ public class kelondroChunkIterator implements Iterator { public byte[] next() { if (nextRecord == null) return null; - byte[] n = new byte[chunksize]; - System.arraycopy(nextRecord, 0, n, 0, chunksize); - slicer.recycle(nextRecord); + byte[] n = nextRecord; nextRecord = slicer.consume(); return n; } @@ -143,13 +142,11 @@ public class kelondroChunkIterator implements Iterator { public void remove() { throw new UnsupportedOperationException(); } - */ private static class filechunkSlicer implements Callable { private filechunkProducer producer; private static byte[] poison = new byte[0]; - private BlockingQueue empty; private BlockingQueue slices; private int slicesize, head; @@ -157,21 +154,10 @@ public class kelondroChunkIterator implements Iterator { assert producer != null; this.producer = producer; this.slices = new ArrayBlockingQueue(stacksize); - this.empty = new ArrayBlockingQueue(stacksize); this.slicesize = slicesize; this.head = head; - // fill the empty queue - for (int i = 0; i < stacksize; i++) empty.add(new byte[head]); } - public void recycle(byte[] c) { - try { - empty.put(c); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - public byte[] consume() { try { byte[] b = slices.take(); // leer @@ -193,7 +179,7 @@ public class kelondroChunkIterator implements Iterator { filechunk c; int p; try { - byte[] slice = empty.take(); + byte[] slice = new byte[head]; int slicec = 0; consumer: while(true) { c = producer.consume(); @@ -212,7 +198,7 @@ public class kelondroChunkIterator implements Iterator { // the slice is now full p += slicesize - slicec; slices.put(slice); - slice = empty.take(); + slice = new byte[head]; slicec = 0; continue slicefiller; } else {