modified access method to get index entries out of a array of BLOBs:

iterate them, then merge; not collect them and merge then.
This should use less memory and may behave better in an environment with many queries.
To ensure that too many queries will not cause total blocking,
a time-out of one second was also added. After the time-out
the index data that was collected so far is returned.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6013 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 55ff919b5d
commit c38c852090

@ -480,7 +480,8 @@ public class BLOBArray implements BLOB {
* @return
* @throws IOException
*/
public synchronized List<byte[]> getAll(byte[] key) throws IOException {
public synchronized Iterable<byte[]> getAll(byte[] key) throws IOException {
/*
byte[] b;
ArrayList<byte[]> l = new ArrayList<byte[]>(blobs.size());
for (blobItem bi: blobs) {
@ -488,6 +489,56 @@ public class BLOBArray implements BLOB {
if (b != null) l.add(b);
}
return l;
*/
return new BlobValues(key);
}
public class BlobValues implements Iterator<byte[]>, Iterable<byte[]> {
private Iterator<blobItem> bii;
private byte[] next;
private byte[] key;
public BlobValues(byte[] key) {
this.bii = blobs.iterator();
this.key = key;
this.next = null;
next0();
}
private void next0() {
while (this.bii.hasNext()) {
BLOB b = this.bii.next().blob;
try {
this.next = b.get(key);
if (this.next != null) return;
} catch (IOException e) {
e.printStackTrace();
this.next = null;
return;
}
}
this.next = null;
}
public Iterator<byte[]> iterator() {
return this;
}
public boolean hasNext() {
return this.next != null;
}
public byte[] next() {
byte[] n = this.next;
next0();
return n;
}
public void remove() {
throw new UnsupportedOperationException("no remove in BlobValues");
}
}
/**

@ -28,7 +28,6 @@ import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import de.anomic.kelondro.blob.BLOB;
import de.anomic.kelondro.blob.BLOBArray;
@ -197,12 +196,23 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
* @throws IOException
*/
public synchronized ReferenceContainer<ReferenceType> get(final byte[] termHash) throws IOException {
List<byte[]> entries = this.array.getAll(termHash);
if (entries == null || entries.size() == 0) return null;
byte[] a = entries.remove(0);
long timeout = System.currentTimeMillis() + 1000;
Iterator<byte[]> entries = this.array.getAll(termHash).iterator();
if (entries == null || !entries.hasNext()) return null;
byte[] a = entries.next();
int k = 1;
ReferenceContainer<ReferenceType> c = new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(a, payloadrow));
while (entries.size() > 0) {
c = c.merge(new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(entries.remove(0), payloadrow)));
if (System.currentTimeMillis() > timeout) {
Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000");
return c;
}
while (entries.hasNext()) {
c = c.merge(new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(entries.next(), payloadrow)));
k++;
if (System.currentTimeMillis() > timeout) {
Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000");
return c;
}
}
return c;
}

@ -42,6 +42,7 @@ import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow;
import de.anomic.kelondro.text.navigationPrototype.NavigationReference;
import de.anomic.kelondro.text.navigationPrototype.NavigationReferenceFactory;
import de.anomic.kelondro.text.navigationPrototype.NavigationReferenceRow;
import de.anomic.kelondro.text.referencePrototype.WordReference;
import de.anomic.kelondro.text.referencePrototype.WordReferenceFactory;
import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
@ -69,7 +70,7 @@ public final class Segment {
private final Log log;
private final IndexCell<WordReference> termIndex;
//private final IndexCell<NavigationReference> authorNavIndex;
private final IndexCell<NavigationReference> authorNavIndex;
private final MetadataRepository urlMetadata;
private final File segmentPath;
private final IODispatcher merger;
@ -97,9 +98,9 @@ public final class Segment {
maxFileSize,
this.merger,
writeBufferSize);
/*
this.authorNavIndex = new IndexCell<NavigationReference>(
new File(segmentPath, "AUTHORNAV"),
new File(new File(segmentPath, "nav_author"), "idx"),
navigationReferenceFactory,
wordOrder,
NavigationReferenceRow.navEntryRow,
@ -108,7 +109,7 @@ public final class Segment {
maxFileSize,
this.merger,
writeBufferSize);
*/
File metadatadir = new File(segmentPath, "METADATA");
if (!metadatadir.exists()) metadatadir.mkdirs();
@ -186,6 +187,8 @@ public final class Segment {
wordCount++;
}
return wordCount;
}

Loading…
Cancel
Save