From a6a3f4b694d3720baab057b103add705de09b727 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 15 Mar 2006 16:01:42 +0000
Subject: [PATCH] fix for svn 1888 this is a redesign of the no-iterator
 solution

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1892 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/IndexControl_p.java                    |  2 +-
 source/de/anomic/plasma/plasmaDHTChunk.java   | 82 +------------------
 source/de/anomic/plasma/plasmaWordIndex.java  | 20 ++---
 .../anomic/plasma/plasmaWordIndexCache.java   |  6 +-
 4 files changed, 17 insertions(+), 93 deletions(-)
diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java
index 5d400b009..6d99487e1 100644
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@@ -284,7 +284,7 @@ public class IndexControl_p {
 
         // generate list
         if (post.containsKey("keyhashsimilar")) {
-            final Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, plasmaWordIndex.RL_WORDFILES, true);
+            final Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, plasmaWordIndex.RL_WORDFILES, true, 256).iterator();
             StringBuffer result = new StringBuffer("Sequential List of Word-Hashes:<br>");
             String hash;
             int i = 0;
diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java
index 253b73adc..31407ffc9 100644
--- a/source/de/anomic/plasma/plasmaDHTChunk.java
+++ b/source/de/anomic/plasma/plasmaDHTChunk.java
@@ -169,88 +169,12 @@ public class plasmaDHTChunk {
         return;
     }
 
-    private int  selectTransferContainersResource(String hash, int resourceLevel, int maxcount) {
-        // the hash is a start hash from where the indexes are picked
-        ArrayList tmpContainers = new ArrayList(maxcount);
-        try {
-            String[] wordHashes = wordIndex.wordHashes(hash, resourceLevel, true, maxcount);
-            plasmaWordIndexEntryContainer indexContainer;
-            Iterator urlIter;
-            plasmaWordIndexEntry indexEntry;
-            plasmaCrawlLURL.Entry lurl;
-            int refcount = 0;
-
-            urlCache = new HashMap();
-            while ((maxcount > refcount) && ((tmpContainers.size() == 0) || (yacyDHTAction.dhtDistance(wordHashes[refcount], ((plasmaWordIndexEntryContainer) tmpContainers.get(0)).wordHash()) < 0.2))) {
-                // make an on-the-fly entity and insert values
-                indexContainer = wordIndex.getContainer(wordHashes[refcount], true, 10000);
-                int notBoundCounter = 0;
-                try {
-                    urlIter = indexContainer.entries();
-                    // iterate over indexes to fetch url entries and store them in the urlCache
-                    while ((urlIter.hasNext()) && (maxcount > refcount)) {
-                        indexEntry = (plasmaWordIndexEntry) urlIter.next();
-                        try {
-                            lurl = lurls.getEntry(indexEntry.getUrlHash(), indexEntry);
-                            if ((lurl == null) || (lurl.url() == null)) {
-                                notBoundCounter++;
-                                urlIter.remove();
-                                wordIndex.removeEntries(wordHashes[refcount], new String[] { indexEntry.getUrlHash() }, true);
-                            } else {
-                                urlCache.put(indexEntry.getUrlHash(), lurl);
-                                refcount++;
-                            }
-                        } catch (IOException e) {
-                            notBoundCounter++;
-                            urlIter.remove();
-                            wordIndex.removeEntries(wordHashes[refcount], new String[] { indexEntry.getUrlHash() }, true);
-                        }
-                    }
-
-                    // remove all remaining; we have enough
-                    while (urlIter.hasNext()) {
-                        indexEntry = (plasmaWordIndexEntry) urlIter.next();
-                        urlIter.remove();
-                    }
-
-                    // use whats left
-                    log.logFine("Selected partial index (" + indexContainer.size() + " from " + wordIndex.indexSize(wordHashes[refcount-1]) + " URLs, " + notBoundCounter + " not bound) for word " + indexContainer.wordHash());
-                    tmpContainers.add(indexContainer);
-                } catch (kelondroException e) {
-                    log.logSevere("plasmaWordIndexDistribution/2: deleted DB for word " + wordHashes[refcount], e);
-                    wordIndex.deleteIndex(wordHashes[refcount]);
-                }
-            }
-            // create result
-            indexContainers = (plasmaWordIndexEntryContainer[]) tmpContainers.toArray(new plasmaWordIndexEntryContainer[tmpContainers.size()]);
-
-            if ((indexContainers == null) || (indexContainers.length == 0)) {
-                log.logFine("No index available for index transfer, hash start-point " + startPointHash);
-                this.status = chunkStatus_FAILED;
-                return 0;
-            }
-
-            this.status = chunkStatus_FILLED;
-            
-            return refcount;
-        } catch (kelondroException e) {
-            log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e);
-            indexContainers = new plasmaWordIndexEntryContainer[0];
-            urlCache = new HashMap();
-            
-            this.status = chunkStatus_FAILED;
-            
-            return 0;
-        }
-    }
-
-    /*
-    private int  selectTransferContainersResource(String hash, int resourceLevel, int maxcount) {
+    private int selectTransferContainersResource(String hash, int resourceLevel, int maxcount) {
         // the hash is a start hash from where the indexes are picked
         ArrayList tmpContainers = new ArrayList(maxcount);
         String nexthash = "";
         try {
-            Iterator wordHashIterator = wordIndex.wordHashes(hash, resourceLevel, true);
+            Iterator wordHashIterator = wordIndex.wordHashes(hash, resourceLevel, true, maxcount).iterator();
             plasmaWordIndexEntryContainer indexContainer;
             Iterator urlIter;
             plasmaWordIndexEntry indexEntry;
@@ -321,7 +245,7 @@ public class plasmaDHTChunk {
             return 0;
         }
     }
-    */
+    
     
     public int deleteTransferIndexes() {
         Iterator urlIter;
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index ba738c65e..f190b32d8 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -54,6 +54,7 @@ import java.util.Map;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.Date;
+import java.util.TreeSet;
 import java.net.URL;
 
 import de.anomic.htmlFilter.htmlFilterContentScraper;
@@ -380,20 +381,15 @@ public final class plasmaWordIndex {
     public static final int RL_ASSORTMENTS = 2;
     public static final int RL_WORDFILES   = 3;
     
-    public synchronized String[] wordHashes(String startHash, int resourceLevel, boolean rot, int count) {
-        String[] hashes = new String[count];
+    public synchronized TreeSet wordHashes(String startHash, int resourceLevel, boolean rot, int count) {
+        TreeSet hashes = new TreeSet();
         Iterator i = wordHashes(startHash, resourceLevel, rot);
-        int j = 0;
-        while ((count-- > 0) && (i.hasNext())) {
-            hashes[j++] = (String) i.next();
-        }
-        if (count > 0) {
-            String[] s = new String[j];
-            System.arraycopy(hashes, 0, s, 0, j);
-            return s;
-        } else {
-            return hashes;
+        String hash;
+        while ((hashes.size() < count) && (i.hasNext())) {
+            hash = (String) i.next();
+            if ((hash != null) && (hash.length() > 0)) hashes.add(hash);
         }
+        return hashes;
     }
     
     public Iterator wordHashes(String startHash, int resourceLevel, boolean rot) {
diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java
index c0fab2c2c..84e6eb3de 100644
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@@ -56,7 +56,7 @@ import de.anomic.kelondro.kelondroRecords;
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacySeedDB;
 
-public final class plasmaWordIndexCache /*implements plasmaWordIndexInterface*/ {
+public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 
     // environment constants
     private static final String indexArrayFileName = "indexDump1.array";
@@ -277,6 +277,10 @@ public final class plasmaWordIndexCache /*implements plasmaWordIndexInterface*/
         return kCache.size();
     }
 
+    public int size() {
+        return wCache.size() + kCache.size();
+    }
+
     public int indexSize(String wordHash) {
         int size = 0;
         plasmaWordIndexEntryContainer cacheIndex = (plasmaWordIndexEntryContainer) wCache.get(wordHash);