- re-activated concurrent iterator for EcoFiles

- added javadoc for new concurrent intialization in kelondroBytesLongMap
- switched default value for commons storage to false
- version step

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5361 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 2e2120046f
commit 45ad1c3dd5

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5 javacTarget=1.5
# Release Configuration # Release Configuration
releaseVersion=0.614 releaseVersion=0.615
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -265,7 +265,7 @@ indexSecondaryPath=
# are stored back into the index, and references with bad ranking are sorted out. Such sorted-out references can be stored # are stored back into the index, and references with bad ranking are sorted out. Such sorted-out references can be stored
# for later use (but there is no at this time). If the sorted-out references should be stored, the following property should be # for later use (but there is no at this time). If the sorted-out references should be stored, the following property should be
# set to true. If set to false, they are abandoned (deleted), and previously stored commons are removed. # set to true. If set to false, they are abandoned (deleted), and previously stored commons are removed.
index.storeCommons=true index.storeCommons=false
# the path to the LISTS files. Most lists are used to filter web content # the path to the LISTS files. Most lists are used to filter web content
listsPath=DATA/LISTS listsPath=DATA/LISTS

@ -132,6 +132,16 @@ public class kelondroBytesLongMap {
index = null; index = null;
} }
/**
* this method creates a concurrent thread that can take entries that are used to initialize the map
* it should be used when a bytesLongMap is initialized when a file is read. Concurrency of FileIO and
* map creation will speed up the initialization process.
* @param keylength
* @param objectOrder
* @param space
* @param bufferSize
* @return
*/
public static initDataConsumer asynchronusInitializer(final int keylength, final kelondroByteOrder objectOrder, final int space, int bufferSize) { public static initDataConsumer asynchronusInitializer(final int keylength, final kelondroByteOrder objectOrder, final int space, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new kelondroBytesLongMap(keylength, objectOrder, space), bufferSize); initDataConsumer initializer = new initDataConsumer(new kelondroBytesLongMap(keylength, objectOrder, space), bufferSize);
ExecutorService service = Executors.newSingleThreadExecutor(); ExecutorService service = Executors.newSingleThreadExecutor();
@ -140,7 +150,7 @@ public class kelondroBytesLongMap {
return initializer; return initializer;
} }
public static class entry { private static class entry {
public byte[] key; public byte[] key;
public long l; public long l;
public entry(final byte[] key, final long l) { public entry(final byte[] key, final long l) {
@ -165,6 +175,11 @@ public class kelondroBytesLongMap {
this.result = result; this.result = result;
} }
/**
* hand over another entry that shall be inserted into the BytesLongMap with an addl method
* @param key
* @param l
*/
public void consume(final byte[] key, final long l) { public void consume(final byte[] key, final long l) {
try { try {
cache.put(new entry(key, l)); cache.put(new entry(key, l));
@ -173,6 +188,10 @@ public class kelondroBytesLongMap {
} }
} }
/**
* to signal the initialization thread that no more entries will be sublitted with consumer()
* this method must be called. The process will not terminate if this is not called before.
*/
public void finish() { public void finish() {
try { try {
cache.put(poison); cache.put(poison);
@ -181,6 +200,14 @@ public class kelondroBytesLongMap {
} }
} }
/**
* this must be called after a finish() was called. this method blocks until all entries
* had been processed, and the content was sorted. It returns the kelondroBytesLongMap
* that the user wanted to initialize
* @return
* @throws InterruptedException
* @throws ExecutionException
*/
public kelondroBytesLongMap result() throws InterruptedException, ExecutionException { public kelondroBytesLongMap result() throws InterruptedException, ExecutionException {
return this.result.get(); return this.result.get();
} }
@ -188,9 +215,7 @@ public class kelondroBytesLongMap {
public kelondroBytesLongMap call() throws IOException { public kelondroBytesLongMap call() throws IOException {
try { try {
entry c; entry c;
while(true) { while ((c = cache.take()) != poison) {
c = cache.take();
if (c == poison) break;
map.addl(c.key, c.l); map.addl(c.key, c.l);
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {

@ -24,8 +24,6 @@
package de.anomic.kelondro; package de.anomic.kelondro;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -34,6 +32,9 @@ import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
public class kelondroChunkIterator implements Iterator<byte[]> { public class kelondroChunkIterator implements Iterator<byte[]> {
@ -48,7 +49,7 @@ public class kelondroChunkIterator implements Iterator<byte[]> {
* @param chunksize: the size of the chunks that are returned by next(). remaining bytes until the lenght of recordsize are skipped * @param chunksize: the size of the chunks that are returned by next(). remaining bytes until the lenght of recordsize are skipped
* @throws FileNotFoundException * @throws FileNotFoundException
*/ */
/*
private final DataInputStream stream; private final DataInputStream stream;
private byte[] nextBytes; private byte[] nextBytes;
@ -98,7 +99,7 @@ public class kelondroChunkIterator implements Iterator<byte[]> {
} }
/* */
ExecutorService service = Executors.newFixedThreadPool(2); ExecutorService service = Executors.newFixedThreadPool(2);
filechunkProducer producer; filechunkProducer producer;
filechunkSlicer slicer; filechunkSlicer slicer;
@ -133,9 +134,7 @@ public class kelondroChunkIterator implements Iterator<byte[]> {
public byte[] next() { public byte[] next() {
if (nextRecord == null) return null; if (nextRecord == null) return null;
byte[] n = new byte[chunksize]; byte[] n = nextRecord;
System.arraycopy(nextRecord, 0, n, 0, chunksize);
slicer.recycle(nextRecord);
nextRecord = slicer.consume(); nextRecord = slicer.consume();
return n; return n;
} }
@ -143,13 +142,11 @@ public class kelondroChunkIterator implements Iterator<byte[]> {
public void remove() { public void remove() {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
*/
private static class filechunkSlicer implements Callable<Integer> { private static class filechunkSlicer implements Callable<Integer> {
private filechunkProducer producer; private filechunkProducer producer;
private static byte[] poison = new byte[0]; private static byte[] poison = new byte[0];
private BlockingQueue<byte[]> empty;
private BlockingQueue<byte[]> slices; private BlockingQueue<byte[]> slices;
private int slicesize, head; private int slicesize, head;
@ -157,19 +154,8 @@ public class kelondroChunkIterator implements Iterator<byte[]> {
assert producer != null; assert producer != null;
this.producer = producer; this.producer = producer;
this.slices = new ArrayBlockingQueue<byte[]>(stacksize); this.slices = new ArrayBlockingQueue<byte[]>(stacksize);
this.empty = new ArrayBlockingQueue<byte[]>(stacksize);
this.slicesize = slicesize; this.slicesize = slicesize;
this.head = head; this.head = head;
// fill the empty queue
for (int i = 0; i < stacksize; i++) empty.add(new byte[head]);
}
public void recycle(byte[] c) {
try {
empty.put(c);
} catch (InterruptedException e) {
e.printStackTrace();
}
} }
public byte[] consume() { public byte[] consume() {
@ -193,7 +179,7 @@ public class kelondroChunkIterator implements Iterator<byte[]> {
filechunk c; filechunk c;
int p; int p;
try { try {
byte[] slice = empty.take(); byte[] slice = new byte[head];
int slicec = 0; int slicec = 0;
consumer: while(true) { consumer: while(true) {
c = producer.consume(); c = producer.consume();
@ -212,7 +198,7 @@ public class kelondroChunkIterator implements Iterator<byte[]> {
// the slice is now full // the slice is now full
p += slicesize - slicec; p += slicesize - slicec;
slices.put(slice); slices.put(slice);
slice = empty.take(); slice = new byte[head];
slicec = 0; slicec = 0;
continue slicefiller; continue slicefiller;
} else { } else {

Loading…
Cancel
Save