modified access method to get index entries out of a array of BLOBs:

iterate them, then merge; not collect them and merge then. This should use less memory and may behave better in an environment with many queries. To ensure that too many queries will not cause total blocking, a time-out of one second was also added. After the time-out the index data that was collected so far is returned. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6013 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · c38c852090
parent 55ff919b5d
commit c38c852090
3 changed files with 75 additions and 11 deletions
--- a/source/de/anomic/kelondro/blob/BLOBArray.java
+++ b/source/de/anomic/kelondro/blob/BLOBArray.java
@ -480,7 +480,8 @@ public class BLOBArray implements BLOB {
     * @return
     * @throws IOException
     */
-    public synchronized List<byte[]> getAll(byte[] key) throws IOException {
+    public synchronized Iterable<byte[]> getAll(byte[] key) throws IOException {
+        /*
        byte[] b;
        ArrayList<byte[]> l = new ArrayList<byte[]>(blobs.size());
        for (blobItem bi: blobs) {
@ -488,6 +489,56 @@ public class BLOBArray implements BLOB {
            if (b != null) l.add(b);
        }
        return l;
+        */
+        return new BlobValues(key);
+    }
+    
+    public class BlobValues implements Iterator<byte[]>, Iterable<byte[]> {
+
+        private Iterator<blobItem> bii;
+        private byte[] next;
+        private byte[] key;
+        
+        public BlobValues(byte[] key) {
+            this.bii = blobs.iterator();
+            this.key = key;
+            this.next = null;
+            next0();
+        }
+        
+        private void next0() {
+            while (this.bii.hasNext()) {
+                BLOB b = this.bii.next().blob;
+                try {
+                    this.next = b.get(key);
+                    if (this.next != null) return;
+                } catch (IOException e) {
+                    e.printStackTrace();
+                    this.next = null;
+                    return;
+                }
+            }
+            this.next = null;
+        }
+        
+        public Iterator<byte[]> iterator() {
+            return this;
+        }
+
+        public boolean hasNext() {
+            return this.next != null;
+        }
+
+        public byte[] next() {
+            byte[] n = this.next;
+            next0();
+            return n;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException("no remove in BlobValues");
+        }
+        
    }
    
    /**
--- a/source/de/anomic/kelondro/text/ReferenceContainerArray.java
+++ b/source/de/anomic/kelondro/text/ReferenceContainerArray.java
@ -28,7 +28,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Date;
 import java.util.Iterator;
-import java.util.List;

 import de.anomic.kelondro.blob.BLOB;
 import de.anomic.kelondro.blob.BLOBArray;
@ -197,12 +196,23 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
     * @throws IOException 
     */
    public synchronized ReferenceContainer<ReferenceType> get(final byte[] termHash) throws IOException {
-    	List<byte[]> entries = this.array.getAll(termHash);
-    	if (entries == null || entries.size() == 0) return null;
-    	byte[] a = entries.remove(0);
+        long timeout = System.currentTimeMillis() + 1000;
+        Iterator<byte[]> entries = this.array.getAll(termHash).iterator();
+    	if (entries == null || !entries.hasNext()) return null;
+    	byte[] a = entries.next();
+    	int k = 1;
    	ReferenceContainer<ReferenceType> c = new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(a, payloadrow));
-    	while (entries.size() > 0) {
-    		c = c.merge(new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(entries.remove(0), payloadrow)));
+    	if (System.currentTimeMillis() > timeout) {
+    	    Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000");
+    	    return c;
+    	}
+    	while (entries.hasNext()) {
+    		c = c.merge(new ReferenceContainer<ReferenceType>(this.factory, termHash, RowSet.importRowSet(entries.next(), payloadrow)));
+    		k++;
+    		if (System.currentTimeMillis() > timeout) {
+    		    Log.logWarning("ReferenceContainerArray", "timout in index retrieval: " + k + " tables searched. timeout = 1000");
+                return c;
+            }
    	}
    	return c;
    }
--- a/source/de/anomic/kelondro/text/Segment.java
+++ b/source/de/anomic/kelondro/text/Segment.java
@ -42,6 +42,7 @@ import de.anomic.kelondro.order.ByteOrder;
 import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow;
 import de.anomic.kelondro.text.navigationPrototype.NavigationReference;
 import de.anomic.kelondro.text.navigationPrototype.NavigationReferenceFactory;
+import de.anomic.kelondro.text.navigationPrototype.NavigationReferenceRow;
 import de.anomic.kelondro.text.referencePrototype.WordReference;
 import de.anomic.kelondro.text.referencePrototype.WordReferenceFactory;
 import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
@ -69,7 +70,7 @@ public final class Segment {
    
    private final Log                            log;
    private final IndexCell<WordReference>       termIndex;
-    //private final IndexCell<NavigationReference> authorNavIndex;
+    private final IndexCell<NavigationReference> authorNavIndex;
    private final MetadataRepository             urlMetadata;
    private final File                           segmentPath;
    private final IODispatcher                   merger;
@ -97,9 +98,9 @@ public final class Segment {
                maxFileSize,
                this.merger,
                writeBufferSize);
-        /*
+        
        this.authorNavIndex = new IndexCell<NavigationReference>(
-                new File(segmentPath, "AUTHORNAV"),
+                new File(new File(segmentPath, "nav_author"), "idx"),
                navigationReferenceFactory,
                wordOrder,
                NavigationReferenceRow.navEntryRow,
@ -108,7 +109,7 @@ public final class Segment {
                maxFileSize,
                this.merger,
                writeBufferSize);
-        */
+        
        File metadatadir = new File(segmentPath, "METADATA");
        if (!metadatadir.exists()) metadatadir.mkdirs();
        
@ -186,6 +187,8 @@ public final class Segment {
            wordCount++;
        }
        
+        
+        
        return wordCount;
    }