- removed some usage of indexEntity

- changed index collection process: indexes are not first flushed to indexEntity, but now collected directly from ram cache and assortments git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1489 6c8d7289-2bf4-0310-a012-ef5d649a1542
19 years ago · fa90c3ca7a
parent 09dc7bbcd7
commit fa90c3ca7a
13 changed files with 153 additions and 181 deletions
--- a/build.properties
+++ b/build.properties
@ -3,7 +3,7 @@ javacSource=1.4
 javacTarget=1.4

 # Release Configuration
-releaseVersion=0.423
+releaseVersion=0.424
 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@ -60,7 +60,6 @@ import de.anomic.http.httpHeader;
 import de.anomic.plasma.plasmaCrawlLURL;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaURL;
-import de.anomic.plasma.plasmaWordIndexEntity;
 import de.anomic.plasma.plasmaWordIndexEntry;
 import de.anomic.plasma.plasmaWordIndexEntryContainer;
 import de.anomic.server.serverObjects;
@ -150,22 +149,15 @@ public class IndexControl_p {
        if (post.containsKey("keyhashdeleteall")) {
            if (delurl || delurlref) {
                // generate an urlx array
-                plasmaWordIndexEntity index = null;
-                try {
-                    index = switchboard.wordIndex.getEntity(keyhash, true, -1);
-                    Iterator en = index.elements(true);
+                plasmaWordIndexEntryContainer index = null;
+                index = switchboard.wordIndex.getContainer(keyhash, true, -1);
+                Iterator en = index.entries();
                int i = 0;
                urlx = new String[index.size()];
                while (en.hasNext()) {
                    urlx[i++] = ((plasmaWordIndexEntry) en.next()).getUrlHash();
                }
-                    index.close();
                index = null;
-                } catch (IOException e) {
-                    urlx = new String[0];
-                } finally {
-                    if (index != null) try { index.close(); } catch (Exception e) {}
-                }
            }
            if (delurlref) {
                for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
@ -256,12 +248,12 @@ public class IndexControl_p {
            }
            prop.put("urlstring", "");
            prop.put("urlhash", "");
-            plasmaWordIndexEntryContainer[] indexes = new plasmaWordIndexEntryContainer[1];
+            plasmaWordIndexEntryContainer index;
            String result;
            long starttime = System.currentTimeMillis();
-            indexes[0] = switchboard.wordIndex.getContainer(keyhash, true, -1);
+            index = switchboard.wordIndex.getContainer(keyhash, true, -1);
            // built urlCache
-            Iterator urlIter = indexes[0].entries();
+            Iterator urlIter = index.entries();
            HashMap knownURLs = new HashMap();
            HashSet unknownURLEntries = new HashSet();
            plasmaWordIndexEntry indexEntry;
@ -271,8 +263,8 @@ public class IndexControl_p {
                try {
                    lurl = switchboard.urlPool.loadedURL.getEntry(indexEntry.getUrlHash(), null);
                    if (lurl.toString() == null) {
-                        switchboard.urlPool.loadedURL.remove(indexEntry.getUrlHash());
                        unknownURLEntries.add(indexEntry.getUrlHash());
+                        urlIter.remove();
                    } else {
                        knownURLs.put(indexEntry.getUrlHash(), lurl);
                    }
@ -280,23 +272,17 @@ public class IndexControl_p {
                    unknownURLEntries.add(indexEntry.getUrlHash());
                }
            }
-            // now delete all entries that have no url entry
-            Iterator hashIter = unknownURLEntries.iterator();
-            while (hashIter.hasNext()) {
-                indexes[0].remove((String) hashIter.next());
-            }
            // use whats remaining           
            String gzipBody = switchboard.getConfig("indexControl.gzipBody","false");
            int timeout = (int) switchboard.getConfigLong("indexControl.timeout",60000);
            result = yacyClient.transferIndex(
                         yacyCore.seedDB.getConnected(post.get("hostHash", "")),
-                         indexes,
+                         new plasmaWordIndexEntryContainer[]{index},
                         knownURLs,
                         "true".equalsIgnoreCase(gzipBody),
                         timeout);
-            prop.put("result", (result == null) ? ("Successfully transferred " + indexes[0].size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds") : result);
-            indexes[0] = null;
-            indexes = null;
+            prop.put("result", (result == null) ? ("Successfully transferred " + index.size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds") : result);
+            index = null;
        }

        // generate list
--- a/htroot/yacy/query.java
+++ b/htroot/yacy/query.java
@ -47,7 +47,6 @@
 // if the shell's current path is HTROOT

 import java.util.Date;
-import java.io.IOException;
 import de.anomic.http.httpHeader;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverObjects;
@ -86,16 +85,7 @@ public final class query {
        if (obj.equals("rwiurlcount")) {
            // the total number of different urls in the rwi is returned
            // <env> shall contain a word hash, the number of assigned lurls to this hash is returned
-            de.anomic.plasma.plasmaWordIndexEntity entity = null;
-            try {
-                entity = sb.wordIndex.getEntity(env, true, -1);
-                prop.put("response", entity.size());
-                entity.close();
-            } catch (IOException e) {
-                prop.put("response", -1);
-            } finally {
-              if (entity != null) try { entity.close(); } catch (Exception e) {}
-            }
+            prop.put("response", sb.wordIndex.indexSize(env));
            return prop;
        }

--- a/source/de/anomic/plasma/plasmaDbImporter.java
+++ b/source/de/anomic/plasma/plasmaDbImporter.java
@ -225,22 +225,16 @@ public class plasmaDbImporter extends Thread {
            Iterator importWordHashIterator = this.importWordIndex.wordHashes(wordChunkStartHash, true, true);
            while (!isAborted() && importWordHashIterator.hasNext()) {
                
-                plasmaWordIndexEntity importWordIdxEntity = null;
+                plasmaWordIndexEntryContainer newContainer;
                try {
                    wordCounter++;
                    wordHash = (String) importWordHashIterator.next();
-                    importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1);
+                    newContainer = importWordIndex.getContainer(wordHash, true, -1);
                    
-                    if (importWordIdxEntity.size() == 0) {
-                        importWordIdxEntity.deleteComplete();
-                        continue;
-                    }
-                    
-                    // creating a container used to hold the imported entries
-                    plasmaWordIndexEntryContainer newContainer = new plasmaWordIndexEntryContainer(wordHash,importWordIdxEntity.size());
+                    if (newContainer.size() == 0) continue;
                    
                    // the combined container will fit, read the container
-                    Iterator importWordIdxEntries = importWordIdxEntity.elements(true);
+                    Iterator importWordIdxEntries = newContainer.entries();
                    plasmaWordIndexEntry importWordIdxEntry;
                    while (importWordIdxEntries.hasNext()) {
                        
@ -262,9 +256,6 @@ public class plasmaDbImporter extends Thread {
                            }
                        } catch (IOException e) {}
                        
-                        // adding word index entity to container
-                        newContainer.add(importWordIdxEntry,System.currentTimeMillis());
-                        
                        if (entryCounter % 500 == 0) {
                            this.log.logFine(entryCounter + " word entries and " + wordCounter + " word entities processed so far.");
                        }
@ -277,7 +268,6 @@ public class plasmaDbImporter extends Thread {
                    homeWordIndex.addEntries(newContainer, true);
                                        
                    // delete complete index entity file
-                    importWordIdxEntity.close();
                    importWordIndex.deleteIndex(wordHash);                 
                    
                    // print out some statistical information
@ -300,7 +290,6 @@ public class plasmaDbImporter extends Thread {
                } catch (Exception e) {
                    log.logSevere("Import of word entity '" + wordHash + "' failed.",e);
                } finally {
-                    if (importWordIdxEntity != null) try { importWordIdxEntity.close(); } catch (Exception e) {}
                }
            }
            
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -551,8 +551,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        getConfig("allowDistributeIndex", "false").equalsIgnoreCase("true"),
        getConfig("allowDistributeIndexWhileCrawling","false").equalsIgnoreCase("true"),
        getConfig("indexDistribution.gzipBody","false").equalsIgnoreCase("true"),
-        (int)getConfigLong("indexDistribution.timeout",60000),
-        (int)getConfigLong("indexDistribution.maxOpenFiles",800)
+        (int)getConfigLong("indexDistribution.timeout",60000) /*,
+        (int)getConfigLong("indexDistribution.maxOpenFiles",800)*/
        );
        indexDistribution.setCounts(150, 1, 3, 10000);
        deployThread("20_dhtdistribution", "DHT Distribution", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", null,
@ -1353,7 +1353,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                            words = condenser.RESULT_SIMI_WORDS;
                            
                            // transfering the index to the storage peer
-                            String error = yacyClient.transferIndex(seed,(plasmaWordIndexEntryContainer[])tmpContainers.toArray(new plasmaWordIndexEntity[tmpContainers.size()]),urlCache,true,120000);
+                            String error = yacyClient.transferIndex(
+                                            seed,
+                                            (plasmaWordIndexEntryContainer[])tmpContainers.toArray(new plasmaWordIndexEntryContainer[tmpContainers.size()]),
+                                            urlCache,
+                                            true,
+                                            120000);
                            
                            if (error != null) {
                                words = wordIndex.addPageIndex(entry.url(), urlHash, docDate, (int) entry.size(), condenser, plasmaWordIndexEntry.language(entry.url()), plasmaWordIndexEntry.docType(document.getMimeType()));
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -178,6 +178,10 @@ public final class plasmaWordIndex {
        return condenser.RESULT_SIMI_WORDS;
    }

+    public int indexSize(String wordHash) {
+        return ramCache.indexSize(wordHash);
+    }
+    
    public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
        return ramCache.getContainer(wordHash, deleteIfEmpty, maxTime);
    }
--- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java
@ -173,6 +173,23 @@ public final class plasmaWordIndexAssortment {
        return row2container(wordHash, row);
 	}

+    public boolean contains(String wordHash) {
+        // gets a word index from assortment database
+        // and returns the content record
+        byte[][] row = null;
+        try {
+            row = assortments.get(wordHash.getBytes());
+            return (row != null);
+        } catch (IOException e) {
+            return false;
+        } catch (kelondroException e) {
+            log.logSevere("removeAssortment/kelondro-error: " + e.getMessage()
+                    + " - reset assortment-DB " + assortments.file(), e);
+            resetDatabase();
+            return false;
+        }
+    }
+    
    public plasmaWordIndexEntryContainer get(String wordHash) {
        // gets a word index from assortment database
        // and returns the content record
--- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
@ -226,6 +226,14 @@ public final class plasmaWordIndexAssortmentCluster {
        return record;
    }

+    public int indexSize(String wordHash) {
+        int size = 0;
+        for (int i = 0; i < clusterCount; i++) {
+            if (assortments[i].contains(wordHash)) size += i + 1;
+        }
+        return size;
+    }
+    
    public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) {
        HashSet iterators = new HashSet();
        //if (rot) System.out.println("WARNING: kelondroMergeIterator does not work correctly when individual iterators rotate on their own!");
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@ -258,6 +258,21 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
        return java.lang.Math.max(assortmentCluster.sizeTotal(), java.lang.Math.max(backend.size(), cache.size()));
    }

+    public int indexSize(String wordHash) {
+        int size = 0;
+        try {
+            plasmaWordIndexEntity entity = backend.getEntity(wordHash, true, -1);
+            if (entity != null) {
+                size += entity.size();
+                entity.close();
+            }
+        } catch (IOException e) {}
+        size += assortmentCluster.indexSize(wordHash);
+        TreeMap cacheIndex = (TreeMap) cache.get(wordHash);
+        if (cacheIndex != null) size += cacheIndex.size();
+        return size;
+    }
+    
    public Iterator wordHashes(String startWordHash, boolean up) {
        // Old convention implies rot = true
        //return new rotatingWordHashes(startWordHash, up);
--- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java
+++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
@ -47,7 +47,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Enumeration;
 import java.util.Iterator;
-import java.util.HashSet;
 import java.util.HashMap;
 import de.anomic.yacy.yacyCore;
 import de.anomic.yacy.yacySeed;
@ -77,8 +76,6 @@ public final class plasmaWordIndexDistribution {
    private boolean closed;
    private boolean gzipBody4Distribution;
    private int timeout4Distribution;
-    private int maxOpenFiles4Distribution;
-
    public transferIndexThread transferIdxThread = null;

    public plasmaWordIndexDistribution(
@ -88,8 +85,7 @@ public final class plasmaWordIndexDistribution {
            boolean enable, 
            boolean enabledWhileCrawling, 
            boolean gzipBody, 
-            int timeout,
-            int maxOpenFiles
+            int timeout
    ) {
        this.urlPool = urlPool;
        this.wordIndex = wordIndex;
@ -100,7 +96,6 @@ public final class plasmaWordIndexDistribution {
        setCounts(100 /*indexCount*/,  1 /*juniorPeerCount*/, 3 /*seniorPeerCount*/, 8000);
        this.gzipBody4Distribution = gzipBody;
        this.timeout4Distribution = timeout;
-        this.maxOpenFiles4Distribution = maxOpenFiles;
    }

    public void enable() {
@ -201,9 +196,8 @@ public final class plasmaWordIndexDistribution {
        // collect index
        String startPointHash = selectTransferStart();
        log.logFine("Selected hash " + startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, startPointHash));
-        Object[] selectResult = selectTransferContainers(startPointHash, indexCount, this.maxOpenFiles4Distribution);
+        Object[] selectResult = selectTransferContainers(startPointHash, indexCount);
        plasmaWordIndexEntryContainer[] indexContainers = (plasmaWordIndexEntryContainer[]) selectResult[0];
-        //Integer openedFiles = (Integer) selectResult[2];
        HashMap urlCache = (HashMap) selectResult[1]; // String (url-hash) / plasmaCrawlLURL.Entry 
        if ((indexContainers == null) || (indexContainers.length == 0)) {
            log.logFine("No index available for index transfer, hash start-point " + startPointHash);
@ -267,7 +261,12 @@ public final class plasmaWordIndexDistribution {
                return -1; // interrupted
            }
            start = System.currentTimeMillis();
-            error = yacyClient.transferIndex(seeds[i], indexContainers, urlCache, this.gzipBody4Distribution, this.timeout4Distribution);
+            error = yacyClient.transferIndex(
+                            seeds[i],
+                            indexContainers,
+                            urlCache,
+                            this.gzipBody4Distribution,
+                            this.timeout4Distribution);
            if (error == null) {
                log.logInfo("Index transfer of " + indexCount + " words [" + indexContainers[0].wordHash() + " .. " + indexContainers[indexContainers.length - 1].wordHash() + "] to peer " + seeds[i].getName() + ":" + seeds[i].hash + " in " + ((System.currentTimeMillis() - start) / 1000)
                                + " seconds successfull (" + (1000 * indexCount / (System.currentTimeMillis() - start + 1)) + " words/s)");
@ -285,18 +284,9 @@ public final class plasmaWordIndexDistribution {
        if (hc1 >= peerCount) {
            // success
            if (delete) {
-                try {
-                    if (deleteTransferIndexes(indexContainers)) {
-                        log.logFine("Deleted all " + indexContainers.length + " transferred whole-word indexes locally");
+                int deletedURLs = deleteTransferIndexes(indexContainers);
+                log.logFine("Deleted from " + indexContainers.length + " transferred RWIs locally, removed " + deletedURLs + " URL references");
                return indexCount;
-                    } else {
-                        log.logSevere("Deleted not all transferred whole-word indexes");
-                        return -1;
-                    }
-                } catch (IOException ee) {
-                    log.logSevere("Deletion of indexes not possible:" + ee.getMessage(), ee);
-                    return -1;
-                }
            } else {
                // simply close the indexEntities
                closeTransferIndexes(indexContainers);
@ -323,86 +313,67 @@ public final class plasmaWordIndexDistribution {
    }

    Object[] /* of {plasmaWordIndexEntryContainer[], HashMap(String, plasmaCrawlLURL.Entry)}*/
-           selectTransferContainers(String hash, int count, int maxOpenFiles) {
+           selectTransferContainers(String hash, int count) {
        // the hash is a start hash from where the indexes are picked
        ArrayList tmpContainers = new ArrayList(count);
        String nexthash = "";
        try {
-            int currOpenFiles = 0;
            Iterator wordHashIterator = this.wordIndex.wordHashes(hash, true, true);
-            plasmaWordIndexEntity indexEntity;
            plasmaWordIndexEntryContainer indexContainer;
            Iterator urlIter;
-            Iterator hashIter;
            plasmaWordIndexEntry indexEntry;
            plasmaCrawlLURL.Entry lurl;
-            final HashSet unknownURLEntries = new HashSet();
+            int notBoundCounter = 0;
            final HashMap knownURLs = new HashMap();
            while (
                    (count > 0) &&
-                    (currOpenFiles < maxOpenFiles) &&
                    (wordHashIterator.hasNext()) &&
                    ((nexthash = (String) wordHashIterator.next()) != null) && 
                    (nexthash.trim().length() > 0) &&
-                    ((currOpenFiles == 0) ||
-                     (yacyDHTAction.dhtDistance(nexthash, ((plasmaWordIndexEntity)tmpContainers.get(0)).wordHash()) < 0.2))
+                    ((tmpContainers.size() == 0) ||
+                     (yacyDHTAction.dhtDistance(nexthash, ((plasmaWordIndexEntryContainer)tmpContainers.get(0)).wordHash()) < 0.2))
            ) {
-                indexEntity = this.wordIndex.getEntity(nexthash, true, -1);
-                if (indexEntity.size() == 0) {
-                    indexEntity.deleteComplete();
-                } else {
                // make an on-the-fly entity and insert values
-                    indexContainer = new plasmaWordIndexEntryContainer(indexEntity.wordHash());
+                    indexContainer = this.wordIndex.getContainer(nexthash, true, 10000);
                    try {
-                        urlIter = indexEntity.elements(true);
-                        unknownURLEntries.clear();
+                        urlIter = indexContainer.entries();
+                        // iterate over indexes to fetch url entries and store them in the urlCache
                        while ((urlIter.hasNext()) && (count > 0)) {
                            indexEntry = (plasmaWordIndexEntry) urlIter.next();
                            try {
                                lurl = this.urlPool.loadedURL.getEntry(indexEntry.getUrlHash(), indexEntry);
                                if ((lurl == null) || (lurl.url() == null)) {
-                                    unknownURLEntries.add(indexEntry.getUrlHash());
+                                    notBoundCounter++;
+                                    urlIter.remove();
+                                    this.wordIndex.removeEntries(nexthash, new String[]{indexEntry.getUrlHash()}, true);
                                } else {
                                    knownURLs.put(indexEntry.getUrlHash(), lurl);
-                                    indexContainer.add(indexEntry);
                                    count--;
                                }
                            } catch (IOException e) {
-                                unknownURLEntries.add(indexEntry.getUrlHash());
+                                notBoundCounter++;
+                                urlIter.remove();
+                                this.wordIndex.removeEntries(nexthash, new String[]{indexEntry.getUrlHash()}, true);
                            }
                        }
-                        // now delete all entries that have no url entry
-                        hashIter = unknownURLEntries.iterator();
-                        while (hashIter.hasNext()) {
-                            String nextUrlHash = (String) hashIter.next();
-                            indexEntity.removeEntry(nextUrlHash, true);
-                            this.urlPool.loadedURL.remove(nextUrlHash);
-                        }
                        
-                        // deleting entity if there are no more entries left
-                        // This could occure if there are unknownURLs in the entity
-                        if (indexEntity.size() == 0) {
-                            indexEntity.deleteComplete();
+                        // remove all remaining; we have enough
+                        while (urlIter.hasNext()) {
+                            indexEntry = (plasmaWordIndexEntry) urlIter.next();
+                            urlIter.remove();
                        }
                        
-                        // use whats remaining
-                        this.log.logFine("Selected partial index (" + indexContainer.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + indexContainer.wordHash());
+                        // use whats left
+                        this.log.logFine("Selected partial index (" + indexContainer.size() + " from " + this.wordIndex.indexSize(nexthash) +" URLs, " + notBoundCounter + " not bound) for word " + indexContainer.wordHash());
                        tmpContainers.add(indexContainer);
                    } catch (kelondroException e) {
-                        this.log.logSevere("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash(), e);
-                        indexEntity.deleteComplete();
-                    }
-                    indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
-                    indexEntity = null;
+                        this.log.logSevere("plasmaWordIndexDistribution/2: deleted DB for word " + nexthash, e);
+                        this.wordIndex.deleteIndex(nexthash);
                    }
-                
            }
            // transfer to array
            plasmaWordIndexEntryContainer[] entryContainers = (plasmaWordIndexEntryContainer[]) tmpContainers.toArray(new plasmaWordIndexEntryContainer[tmpContainers.size()]);
-            return new Object[]{entryContainers, knownURLs, new Integer(currOpenFiles)};
-        } catch (IOException e) {
-            this.log.logSevere("selectTransferIndexes IO-Error (hash=" + nexthash + "): " + e.getMessage(), e);
-            return new Object[]{new plasmaWordIndexEntity[0], new HashMap(0)};
+            return new Object[]{entryContainers, knownURLs};
        } catch (kelondroException e) {
            this.log.logSevere("selectTransferIndexes database corrupted: " + e.getMessage(), e);
            return new Object[]{new plasmaWordIndexEntity[0], new HashMap(0)};
@ -443,13 +414,11 @@ public final class plasmaWordIndexDistribution {
        }
    }

-    boolean deleteTransferIndexes(plasmaWordIndexEntryContainer[] indexContainers) throws IOException {
+    int deleteTransferIndexes(plasmaWordIndexEntryContainer[] indexContainers) {
        Iterator urlIter;
        plasmaWordIndexEntry indexEntry;
-        plasmaWordIndexEntity indexEntity;
        String[] urlHashes;
-        int sz;
-        boolean success = true;
+        int count = 0;
        for (int i = 0; i < indexContainers.length; i++) {
            // delete entries separately
            int c = 0;
@ -459,15 +428,11 @@ public final class plasmaWordIndexDistribution {
                indexEntry = (plasmaWordIndexEntry) urlIter.next();
                urlHashes[c++] = indexEntry.getUrlHash();
            }
-            wordIndex.removeEntries(indexContainers[i].wordHash(), urlHashes, true);
-            indexEntity = wordIndex.getEntity(indexContainers[i].wordHash(), true, -1);
-            sz = indexEntity.size();
-            // indexEntity.close();
-            closeTransferIndex(indexEntity);
-            log.logFine("Deleted partial index (" + c + " URLs) for word " + indexContainers[i].wordHash() + "; " + sz + " entries left");
+            count += wordIndex.removeEntries(indexContainers[i].wordHash(), urlHashes, true);
+            log.logFine("Deleted partial index (" + c + " URLs) for word " + indexContainers[i].wordHash() + "; " + this.wordIndex.indexSize(indexContainers[i].wordHash()) + " entries left");
            indexContainers[i] = null;
        }
-        return success;
+        return count;
    }

 /*
@ -706,7 +671,6 @@ public final class plasmaWordIndexDistribution {
                }          
            }
        }
-        
    }
    
    public class transferIndexThread extends Thread {
@ -715,7 +679,6 @@ public final class plasmaWordIndexDistribution {
        private boolean finished = false;
        private boolean gzipBody4Transfer = false;
        private int timeout4Transfer = 60000;
-        private int maxOpenFiles4Transfer = 800;
        private int transferedEntryCount = 0;
        private int transferedEntityCount = 0;
        private String status = "Running";
@ -734,7 +697,7 @@ public final class plasmaWordIndexDistribution {
            this.initialWordsDBSize = sb.wordIndex.size();   
            this.gzipBody4Transfer = "true".equalsIgnoreCase(sb.getConfig("indexTransfer.gzipBody","false"));
            this.timeout4Transfer = (int) sb.getConfigLong("indexTransfer.timeout",60000);
-            this.maxOpenFiles4Transfer = (int) sb.getConfigLong("indexTransfer.maxOpenFiles",800);
+            //this.maxOpenFiles4Transfer = (int) sb.getConfigLong("indexTransfer.maxOpenFiles",800);
        }
        
        public void run() {
@ -821,7 +784,6 @@ public final class plasmaWordIndexDistribution {
                 */
                long selectionStart = System.currentTimeMillis(), selectionEnd = 0, selectionTime = 0, iteration = 0;
                
-                Integer openedFiles = new Integer(0);                
                while (!finished && !Thread.currentThread().isInterrupted()) {
                    iteration++;
                    int idxCount = 0;
@ -830,10 +792,9 @@ public final class plasmaWordIndexDistribution {
                    
                    // selecting 500 words to transfer
                    this.status = "Running: Selecting chunk " + iteration;
-                    Object[] selectResult = selectTransferContainers(this.startPointHash, this.chunkSize, this.maxOpenFiles4Transfer - openedFiles.intValue());
+                    Object[] selectResult = selectTransferContainers(this.startPointHash, this.chunkSize);
                    newIndexContainers = (plasmaWordIndexEntryContainer[]) selectResult[0];                                        
                    HashMap urlCache = (HashMap) selectResult[1]; // String (url-hash) / plasmaCrawlLURL.Entry
-                    openedFiles = (Integer) selectResult[2];
                    
                    /* If we havn't selected a word chunk this could be because of
                     * a) no words are left in the index
@ -909,17 +870,10 @@ public final class plasmaWordIndexDistribution {
                            // deleting transfered words from index
                            if (delete) {
                                this.status = "Running: Deleting chunk " + iteration;
-                                try {
-                                    if (deleteTransferIndexes(oldIndexContainers)) {
-                                        plasmaWordIndexDistribution.this.log.logFine("Deleted all " + oldIndexContainers.length + " transferred whole-word indexes locally");
+                                int urlReferences = deleteTransferIndexes(oldIndexContainers);
+                                plasmaWordIndexDistribution.this.log.logFine("Deleted from " + oldIndexContainers.length + " transferred RWIs locally " + urlReferences + " URL references");
                                transferedEntryCount += idxCount;
                                transferedEntityCount += oldIndexContainers.length;
-                                    } else {
-                                        plasmaWordIndexDistribution.this.log.logSevere("Deleted not all transferred whole-word indexes");
-                                    }
-                                } catch (IOException ee) {
-                                    plasmaWordIndexDistribution.this.log.logSevere("Deletion of indexes not possible:" + ee.getMessage(), ee);
-                                }
                            } else {
                                this.closeContainers(oldIndexContainers);
                                transferedEntryCount += idxCount;
--- a/source/de/anomic/plasma/plasmaWordIndexEntity.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java
@ -112,6 +112,7 @@ public final class plasmaWordIndexEntity {
    }
    
    public int size() {
+        if (theIndex == null) return 0;
        int size = theIndex.size();
        if ((size == 0) && (delete)) {
            deleteComplete();
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -842,14 +842,23 @@ public final class yacyClient {
            return null;
        }
    }
-    /*
-         public static byte[] singleGET(String host, int port, String path, int timeout,
-                                   String user, String password,
-                                   httpHeader requestHeader) throws IOException {
-     */

    public static String transferIndex(yacySeed targetSeed, plasmaWordIndexEntryContainer[] indexes, HashMap urlCache, boolean gzipBody, int timeout) {
        
+        // check if we got all necessary urls in the urlCache (only for debugging)
+        Iterator eenum;
+        plasmaWordIndexEntry entry;
+        for (int i = 0; i < indexes.length; i++) {
+            eenum = indexes[i].entries();
+            while (eenum.hasNext()) {
+                entry = (plasmaWordIndexEntry) eenum.next();
+                if (urlCache.get(entry.getUrlHash()) == null) {
+                    System.out.println("DEBUG transferIndex: to-send url hash '" + entry.getUrlHash() + "' is not contained in urlCache");
+                }
+            }
+        }        
+        
+        // transfer the RWI without the URLs
        HashMap in = transferRWI(targetSeed, indexes, gzipBody, timeout);
        if (in == null) { return "no_connection_1"; }
        String result = (String) in.get("result");
@ -868,7 +877,9 @@ public final class yacyClient {
        plasmaCrawlLURL.Entry[] urls = new plasmaCrawlLURL.Entry[uhs.length];
        for (int i = 0; i < uhs.length; i++) {
            urls[i] = (plasmaCrawlLURL.Entry) urlCache.get(uhs[i]);
-            if (urls[i] == null) System.out.println("DEBUG transferIndex: error with requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
+            if (urls[i] == null) {
+                System.out.println("DEBUG transferIndex: requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
+            }
        }
        
        in = transferURL(targetSeed, urls, gzipBody, timeout);
--- a/source/yacy.java
+++ b/source/yacy.java
@ -53,6 +53,7 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import java.net.URL;
+import java.util.ConcurrentModificationException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@ -432,11 +433,13 @@ public final class yacy {
                    run.addShutdownHook(new shutdownHookThread(Thread.currentThread(), sb));

                    // save information about available memory after all initializations
+                    try {
                        sb.setConfig("memoryFreeAfterInitBGC", Runtime.getRuntime().freeMemory());
                        sb.setConfig("memoryTotalAfterInitBGC", Runtime.getRuntime().totalMemory());
                        System.gc();
                        sb.setConfig("memoryFreeAfterInitAGC", Runtime.getRuntime().freeMemory());
                        sb.setConfig("memoryTotalAfterInitAGC", Runtime.getRuntime().totalMemory());
+                    } catch (ConcurrentModificationException e) {}
                    
                    // wait for server shutdown
                    try {
@ -834,22 +837,16 @@ public final class yacy {
                // testing if import process was aborted
                if (Thread.interrupted()) break;
                
-                plasmaWordIndexEntity importWordIdxEntity = null;
+                plasmaWordIndexEntryContainer newContainer;
                try {
                    wordCounter++;
                    wordHash = (String) importWordHashIterator.next();
-                    importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1);
-                    
-                    if (importWordIdxEntity.size() == 0) {
-                        importWordIdxEntity.deleteComplete();
-                        continue;
-                    }
+                    newContainer = importWordIndex.getContainer(wordHash, true, -1);
                    
-                    // creating a container used to hold the imported entries
-                    plasmaWordIndexEntryContainer newContainer = new plasmaWordIndexEntryContainer(wordHash,importWordIdxEntity.size());
+                    if (newContainer.size() == 0) continue;
                    
                    // the combined container will fit, read the container
-                    Iterator importWordIdxEntries = importWordIdxEntity.elements(true);
+                    Iterator importWordIdxEntries = newContainer.entries();
                    plasmaWordIndexEntry importWordIdxEntry;
                    while (importWordIdxEntries.hasNext()) {
                        
@ -871,9 +868,6 @@ public final class yacy {
                            }
                        } catch (IOException e) {}
                        
-                        // adding word index entity to container
-                        newContainer.add(importWordIdxEntry,System.currentTimeMillis());
-                        
                        if (entryCounter % 500 == 0) {
                            log.logFine(entryCounter + " word entries and " + wordCounter + " word entries processed so far.");
                        }
@ -886,7 +880,6 @@ public final class yacy {
                    homeWordIndex.addEntries(newContainer, true);
                                        
                    // delete complete index entity file
-                    importWordIdxEntity.close();
                    importWordIndex.deleteIndex(wordHash);                 
                    
                    // print out some statistical information
@ -912,7 +905,6 @@ public final class yacy {
                } catch (Exception e) {
                    log.logSevere("Import of word entity '" + wordHash + "' failed.",e);
                } finally {
-                    if (importWordIdxEntity != null) try { importWordIdxEntity.close(); } catch (Exception e) {}
                }
            }