From c38c8520909e928b2ce2d298f5246c4f14cfdafb Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 2 Jun 2009 16:53:45 +0000 Subject: [PATCH] modified access method to get index entries out of a array of BLOBs: iterate them, then merge; not collect them and merge then. This should use less memory and may behave better in an environment with many queries. To ensure that too many queries will not cause total blocking, a time-out of one second was also added. After the time-out the index data that was collected so far is returned. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6013 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/kelondro/blob/BLOBArray.java | 53 ++++++++++++++++++- .../text/ReferenceContainerArray.java | 22 +++++--- source/de/anomic/kelondro/text/Segment.java | 11 ++-- 3 files changed, 75 insertions(+), 11 deletions(-) diff --git a/source/de/anomic/kelondro/blob/BLOBArray.java b/source/de/anomic/kelondro/blob/BLOBArray.java index ad74db893..5b2d8ea13 100755 --- a/source/de/anomic/kelondro/blob/BLOBArray.java +++ b/source/de/anomic/kelondro/blob/BLOBArray.java @@ -480,7 +480,8 @@ public class BLOBArray implements BLOB { * @return * @throws IOException */ - public synchronized List getAll(byte[] key) throws IOException { + public synchronized Iterable getAll(byte[] key) throws IOException { + /* byte[] b; ArrayList l = new ArrayList(blobs.size()); for (blobItem bi: blobs) { @@ -488,6 +489,56 @@ public class BLOBArray implements BLOB { if (b != null) l.add(b); } return l; + */ + return new BlobValues(key); + } + + public class BlobValues implements Iterator, Iterable { + + private Iterator bii; + private byte[] next; + private byte[] key; + + public BlobValues(byte[] key) { + this.bii = blobs.iterator(); + this.key = key; + this.next = null; + next0(); + } + + private void next0() { + while (this.bii.hasNext()) { + BLOB b = this.bii.next().blob; + try { + this.next = b.get(key); + if (this.next != null) return; + } catch (IOException e) { + e.printStackTrace(); + this.next = null; + return; + } + } + this.next = null; + } + + public Iterator iterator() { + return this; + } + + public boolean hasNext() { + return this.next != null; + } + + public byte[] next() { + byte[] n = this.next; + next0(); + return n; + } + + public void remove() { + throw new UnsupportedOperationException("no remove in BlobValues"); + } + } /** diff --git a/source/de/anomic/kelondro/text/ReferenceContainerArray.java b/source/de/anomic/kelondro/text/ReferenceContainerArray.java index 4b0787e7d..dbeb4d6d7 100644 --- a/source/de/anomic/kelondro/text/ReferenceContainerArray.java +++ b/source/de/anomic/kelondro/text/ReferenceContainerArray.java @@ -28,7 +28,6 @@ import java.io.File; import java.io.IOException; import java.util.Date; import java.util.Iterator; -import java.util.List; import de.anomic.kelondro.blob.BLOB; import de.anomic.kelondro.blob.BLOBArray; @@ -197,12 +196,23 @@ public final class ReferenceContainerArray { * @throws IOException */ public synchronized ReferenceContainer get(final byte[] termHash) throws IOException { - List entries = this.array.getAll(termHash); - if (entries == null || entries.size() == 0) return null; - byte[] a = entries.remove(0); + long timeout = System.currentTimeMillis() + 1000; + Iterator entries = this.array.getAll(termHash).iterator(); + if (entries == null || !entries.hasNext()) return null; + byte[] a = entries.next(); + int k = 1; ReferenceContainer c = new ReferenceContainer(this.factory, termHash, RowSet.importRowSet(a, payloadrow)); - while (entries.size() > 0) { - c = c.merge(new ReferenceContainer(this.factory, termHash, RowSet.importRowSet(entries.remove(0), payloadrow))); + if (System.currentTimeMillis() > timeout) { + Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000"); + return c; + } + while (entries.hasNext()) { + c = c.merge(new ReferenceContainer(this.factory, termHash, RowSet.importRowSet(entries.next(), payloadrow))); + k++; + if (System.currentTimeMillis() > timeout) { + Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000"); + return c; + } } return c; } diff --git a/source/de/anomic/kelondro/text/Segment.java b/source/de/anomic/kelondro/text/Segment.java index 526aab651..5447c2fff 100644 --- a/source/de/anomic/kelondro/text/Segment.java +++ b/source/de/anomic/kelondro/text/Segment.java @@ -42,6 +42,7 @@ import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow; import de.anomic.kelondro.text.navigationPrototype.NavigationReference; import de.anomic.kelondro.text.navigationPrototype.NavigationReferenceFactory; +import de.anomic.kelondro.text.navigationPrototype.NavigationReferenceRow; import de.anomic.kelondro.text.referencePrototype.WordReference; import de.anomic.kelondro.text.referencePrototype.WordReferenceFactory; import de.anomic.kelondro.text.referencePrototype.WordReferenceRow; @@ -69,7 +70,7 @@ public final class Segment { private final Log log; private final IndexCell termIndex; - //private final IndexCell authorNavIndex; + private final IndexCell authorNavIndex; private final MetadataRepository urlMetadata; private final File segmentPath; private final IODispatcher merger; @@ -97,9 +98,9 @@ public final class Segment { maxFileSize, this.merger, writeBufferSize); - /* + this.authorNavIndex = new IndexCell( - new File(segmentPath, "AUTHORNAV"), + new File(new File(segmentPath, "nav_author"), "idx"), navigationReferenceFactory, wordOrder, NavigationReferenceRow.navEntryRow, @@ -108,7 +109,7 @@ public final class Segment { maxFileSize, this.merger, writeBufferSize); - */ + File metadatadir = new File(segmentPath, "METADATA"); if (!metadatadir.exists()) metadatadir.mkdirs(); @@ -186,6 +187,8 @@ public final class Segment { wordCount++; } + + return wordCount; }