From 1f36bf4dae16d55253d2fb71f9e188324935ae29 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 20 Jul 2005 00:39:06 +0000
Subject: [PATCH] enhanced assortment capacity; added extended WORDS migration

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@412 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 .../plasma/plasmaWordIndexAssortment.java     | 24 ++---
 .../plasmaWordIndexAssortmentCluster.java     | 96 +++++++++++--------
 .../anomic/plasma/plasmaWordIndexCache.java   | 72 +++++++++-----
 .../plasma/plasmaWordIndexClassicDB.java      | 12 +--
 .../anomic/plasma/plasmaWordIndexEntity.java  | 25 +++++
 .../plasma/plasmaWordIndexEntryContainer.java |  5 +
 source/yacy.java                              | 53 ++++------
 7 files changed, 165 insertions(+), 122 deletions(-)

diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java
index 5ada591f6..d5449bb26 100644
--- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java
@@ -75,7 +75,7 @@ public final class plasmaWordIndexAssortment {
     
     // class variables
     private File assortmentFile;
-    private int assortmentCapacity;
+    private int assortmentLength;
     private serverLog log;
     private kelondroTree assortments;
     private long bufferSize;
@@ -99,18 +99,18 @@ public final class plasmaWordIndexAssortment {
 	return structure;
     }
 
-    public plasmaWordIndexAssortment(File storagePath, int assortmentCapacity, int bufferkb, serverLog log) {
+    public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, serverLog log) {
 	if (!(storagePath.exists())) storagePath.mkdirs();
-	this.assortmentFile = new File(storagePath, assortmentFileName + intx(assortmentCapacity) + ".db");
-	this.assortmentCapacity = assortmentCapacity;
-	this.bufferStructureLength = 3 + 2 * assortmentCapacity;
+	this.assortmentFile = new File(storagePath, assortmentFileName + intx(assortmentLength) + ".db");
+	this.assortmentLength = assortmentLength;
+	this.bufferStructureLength = 3 + 2 * assortmentLength;
         this.bufferSize = bufferkb * 1024;
         this.log = log;
         if (assortmentFile.exists()) {
             // open existing assortment tree file
             try {
                 assortments = new kelondroTree(assortmentFile, bufferSize);
-                if (log != null) log.logSystem("Opened Assortment Database, " + assortments.size() + " entries, width " + assortmentCapacity + ", " + bufferkb + "kb buffer"); 
+                if (log != null) log.logSystem("Opened Assortment Database, " + assortments.size() + " entries, width " + assortmentLength + ", " + bufferkb + "kb buffer"); 
             } catch (IOException e){
                 if (log != null) log.logError("unable to open assortment database: " + e.getMessage());
                 e.printStackTrace();
@@ -118,8 +118,8 @@ public final class plasmaWordIndexAssortment {
         } else {
             // create new assortment tree file
             try {
-                assortments = new kelondroTree(assortmentFile, bufferSize, bufferStructure(assortmentCapacity));
-                if (log != null) log.logSystem("Created new Assortment Database, width " + assortmentCapacity + ", " + bufferkb + "kb buffer"); 
+                assortments = new kelondroTree(assortmentFile, bufferSize, bufferStructure(assortmentLength));
+                if (log != null) log.logSystem("Created new Assortment Database, width " + assortmentLength + ", " + bufferkb + "kb buffer"); 
             } catch (IOException e){
                 if (log != null) log.logError("unable to create assortment database: " + e.getMessage());
                 e.printStackTrace();
@@ -131,14 +131,14 @@ public final class plasmaWordIndexAssortment {
         // stores a word index to assortment database
         // this throws an exception if the word hash already existed
         //log.logDebug("storeAssortment: wordHash=" + wordHash + ", urlHash=" + entry.getUrlHash() + ", time=" + creationTime);
-        if (newContainer.size() != assortmentCapacity) throw new RuntimeException("plasmaWordIndexAssortment.store: wrong container size");
+        if (newContainer.size() != assortmentLength) throw new RuntimeException("plasmaWordIndexAssortment.store: wrong container size");
         byte[][] row = new byte[this.bufferStructureLength][];
         row[0] = wordHash.getBytes();
         row[1] = kelondroRecords.long2bytes(1, 4);
         row[2] = kelondroRecords.long2bytes(newContainer.updated(), 8);
         Iterator entries = newContainer.entries();
 	plasmaWordIndexEntry entry;
-        for (int i = 0; i < assortmentCapacity; i++) {
+        for (int i = 0; i < assortmentLength; i++) {
             entry = (plasmaWordIndexEntry) entries.next();
 	    row[3 + 2 * i] = entry.getUrlHash().getBytes();
 	    row[4 + 2 * i] = entry.toEncodedForm(true).getBytes();
@@ -179,7 +179,7 @@ public final class plasmaWordIndexAssortment {
         long updateTime = kelondroRecords.bytes2long(row[2]);
         plasmaWordIndexEntry[] wordEntries = new plasmaWordIndexEntry[this.bufferStructureLength];
         plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);
-	for (int i = 0; i < assortmentCapacity; i++) {
+	for (int i = 0; i < assortmentLength; i++) {
             container.add(new plasmaWordIndexEntry[]{new plasmaWordIndexEntry(new String(row[3 + 2 * i]), new String(row[4 + 2 * i]))}, updateTime);
 	}
         return container;
@@ -192,7 +192,7 @@ public final class plasmaWordIndexAssortment {
         } catch (IOException e) {}
         if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database");
         try {
-            assortments = new kelondroTree(assortmentFile, bufferSize, bufferStructure(assortmentCapacity));
+            assortments = new kelondroTree(assortmentFile, bufferSize, bufferStructure(assortmentLength));
         } catch (IOException e){
             log.logError("unable to re-create assortment database: " + e.getMessage());
             e.printStackTrace();
diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
index 039b6dc16..b1e6a46f7 100644
--- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
@@ -57,47 +57,50 @@ public final class plasmaWordIndexAssortmentCluster {
     
     // class variables
     private File assortmentsPath;
-    private int clusterCapacity;
+    private int clusterCount;
+    public int clusterCapacity;
+    
     private serverLog log;
     private plasmaWordIndexAssortment[] assortments;
     private long completeBufferKB;
 
-    public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCapacity, int bufferkb, serverLog log) {
+    public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, int bufferkb, serverLog log) {
 	// set class variables
 	if (!(assortmentsPath.exists())) assortmentsPath.mkdirs();
-	this.clusterCapacity = clusterCapacity;
+	this.clusterCount = clusterCount;
+        this.clusterCapacity = clusterCount * (clusterCount + 1) / 2;
         this.completeBufferKB = bufferkb;
         this.log = log;
-	this.assortments = new plasmaWordIndexAssortment[clusterCapacity];
+	this.assortments = new plasmaWordIndexAssortment[clusterCount];
 
         // open cluster and close it directly again to detect the element sizes
-        int[] sizes = new int[clusterCapacity];
+        int[] sizes = new int[clusterCount];
         int sumSizes = 1;
         plasmaWordIndexAssortment testAssortment;
-        for (int i = 0; i < clusterCapacity; i++) {
+        for (int i = 0; i < clusterCount; i++) {
 	    testAssortment = new plasmaWordIndexAssortment(assortmentsPath, i + 1, 0, null);
-            sizes[i] = testAssortment.size() + clusterCapacity - i;
+            sizes[i] = testAssortment.size() + clusterCount - i;
             sumSizes += sizes[i];
             testAssortment.close();
             testAssortment = null;
 	}
         
 	// initialize cluster using the cluster elements size for optimal buffer size
-	for (int i = 0; i < clusterCapacity; i++) {
+	for (int i = 0; i < clusterCount; i++) {
 	    assortments[i] = new plasmaWordIndexAssortment(assortmentsPath, i + 1, (int) ((long) completeBufferKB * (long) sizes[i] / (long) sumSizes), log);
 	}
     }
 
-    public plasmaWordIndexEntryContainer storeTry(String wordHash, plasmaWordIndexEntryContainer newContainer) {
+    private plasmaWordIndexEntryContainer storeSingular(String wordHash, plasmaWordIndexEntryContainer newContainer) {
 	// this tries to store the record. If the record does not fit, or a same hash already
 	// exists and would not fit together with the new record, then the record is deleted from
 	// the assortmen(s) and returned together with the newRecord.
 	// if storage was successful, NULL is returned.
-	if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit
+	if (newContainer.size() > clusterCount) return newContainer; // it will not fit
         plasmaWordIndexEntryContainer buffer;
         while ((buffer = assortments[newContainer.size() - 1].remove(wordHash)) != null) {
             newContainer.add(buffer);
-            if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit
+            if (newContainer.size() > clusterCount) return newContainer; // it will not fit
         }
         // the assortment (newContainer.size() - 1) should now be empty. put it in there
         assortments[newContainer.size() - 1].store(wordHash, newContainer);
@@ -105,39 +108,50 @@ public final class plasmaWordIndexAssortmentCluster {
         return null;
     }
     
-    /*
-    public plasmaWordIndexEntryContainer storeTry(String wordHash, plasmaWordIndexEntryContainer newContainer) {
-	// this tries to store the record. If the record does not fit, or a same hash already
-	// exists and would not fit together with the new record, then the record is deleted from
-	// the assortmen(s) and returned together with the newRecord.
-	// if storage was successful, NULL is returned.
-	if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit
-	plasmaWordIndexEntryContainer buffer;
-	for (int i = 0; i < clusterCapacity; i++) {
-	    buffer = assortments[i].remove(wordHash);
-	    if (buffer != null) newContainer.add(buffer);
-            if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit
-	}
-        // we collected all records and the result will fit somewhere..
+    private void storeForced(String wordHash, plasmaWordIndexEntryContainer newContainer) {
+	// this stores the record and overwrites an existing record.
+        // this is safe of we can be shure that the record does not exist before.
+	if ((newContainer == null) || (newContainer.size() == 0) || (newContainer.size() > clusterCount)) return; // it will not fit
         assortments[newContainer.size() - 1].store(wordHash, newContainer);
-        // return null to show that we have stored the new Record successfully
-        return null;
     }
-    */
     
-    /*
-    public plasmaWordIndexEntryContainer removeFromOne(String wordHash, int assortment) {
-        // collect one container from a specific assortment
-        plasmaWordIndexEntryContainer container = assortments[assortment].remove(wordHash);
-	if (container == null) return new plasmaWordIndexEntryContainer(wordHash);
-        return container;
+    private void storeStretched(String wordHash, plasmaWordIndexEntryContainer newContainer) {
+	// this stores the record and stretches the storage over
+        // all the assortments that are necessary to fit in the record
+	if (newContainer.size() <= clusterCount) {
+            storeForced(wordHash, newContainer);
+            return;
+        }
+        plasmaWordIndexEntryContainer c;
+        Iterator i = newContainer.entries();
+        for (int j = clusterCount; j >= 1; j--) {
+            c = new plasmaWordIndexEntryContainer(wordHash);
+            for (int k = 0; k < j; k++) {
+                if (i.hasNext()) {
+                    c.add((plasmaWordIndexEntry) i.next(), newContainer.updated());
+                } else {
+                    storeForced(wordHash, c);
+                    return;
+                }
+            }
+            storeForced(wordHash, c);
+        }
+    }
+    
+    public plasmaWordIndexEntryContainer storeTry(String wordHash, plasmaWordIndexEntryContainer newContainer) {
+        if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit
+        if (newContainer.size() <= clusterCount) newContainer = storeSingular(wordHash, newContainer);
+        if (newContainer == null) return null;
+        newContainer.add(removeFromAll(wordHash));
+        if (newContainer.size() > clusterCapacity) return newContainer;
+        storeStretched(wordHash, newContainer);
+        return null;
     }
-    */
     
     public plasmaWordIndexEntryContainer removeFromAll(String wordHash) {
         // collect all records from all the assortments and return them
         plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
-	for (int i = 0; i < clusterCapacity; i++) {
+	for (int i = 0; i < clusterCount; i++) {
 	    buffer = assortments[i].remove(wordHash);
 	    if (buffer != null) record.add(buffer);
 	}
@@ -146,24 +160,24 @@ public final class plasmaWordIndexAssortmentCluster {
 
     public Iterator hashConjunction(String startWordHash, boolean up) {
         HashSet iterators = new HashSet();
-        for (int i = 0; i < clusterCapacity; i++) iterators.add(assortments[i].hashes(startWordHash, up, true));
+        for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, true));
         return kelondroMergeIterator.cascade(iterators, up);
     }
 
     public int sizeTotal() {
         int total = 0;
-        for (int i = 0; i < clusterCapacity; i++) total += assortments[i].size();
+        for (int i = 0; i < clusterCount; i++) total += assortments[i].size();
         return total;
     }
 
     public int[] sizes() {
-        int[] sizes = new int[clusterCapacity];
-        for (int i = 0; i < clusterCapacity; i++) sizes[i] = assortments[i].size();
+        int[] sizes = new int[clusterCount];
+        for (int i = 0; i < clusterCount; i++) sizes[i] = assortments[i].size();
         return sizes;
     }
         
     public void close() {
-        for (int i = 0; i < clusterCapacity; i++) assortments[i].close();
+        for (int i = 0; i < clusterCount; i++) assortments[i].close();
     }
 
 }
diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java
index 25a080f82..b60e793df 100644
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@@ -47,6 +47,7 @@ import java.io.IOException;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.Enumeration;
 
 import de.anomic.kelondro.kelondroException;
 import de.anomic.kelondro.kelondroMScoreCluster;
@@ -54,6 +55,7 @@ import de.anomic.kelondro.kelondroMergeIterator;
 import de.anomic.kelondro.kelondroRecords;
 import de.anomic.kelondro.kelondroStack;
 import de.anomic.kelondro.kelondroArray;
+import de.anomic.kelondro.kelondroTree;
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacySeedDB;
 
@@ -65,7 +67,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
     private static final String oldSingletonFileName = "indexSingletons0.db";
     private static final String newSingletonFileName = "indexAssortment001.db";
     private static final String indexAssortmentClusterPath = "ACLUSTER";
-    private static final int assortmentLimit = 50;
+    private static final int assortmentCount = 50;
     private static final int ramCacheLimit = 200;
     
     
@@ -119,7 +121,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 	this.maxWords = 10000;
         this.backend = backend;
         this.log = log;
-	this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentLimit, assortmentBufferSize, log);
+	this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, log);
 
         // read in dump of last session
         try {
@@ -438,13 +440,13 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
             int count = hashScore.getMaxScore();
             long time = longTime(hashDate.getScore(hash));
             if ((count > ramCacheLimit) ||
-                ((count > assortmentLimit) && (System.currentTimeMillis() - time > 10000))) {
+                ((count > assortmentCount) && (System.currentTimeMillis() - time > 10000))) {
                 // flush high-score entries
-                flushFromMem(hash, true);
+                flushFromMem(hash);
             } else {
                 // flush oldest entries
                 hash = (String) hashDate.getMinObject();
-                flushFromMem(hash, true);
+                flushFromMem(hash);
             }
         } catch (Exception e) {
             log.logError("flushFromMem: " + e.getMessage());
@@ -453,13 +455,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
         flushThread.proceed();
     }
     
-    private int flushFromMem(String key, boolean reintegrate) {
+    private int flushFromMem(String key) {
         // this method flushes indexes out from the ram to the disc.
-        // at first we check the singleton database and act accordingly
-        // if we we are to flush an index, but see also an entry in the singletons, we
-        // decide upn the 'reintegrate'-Flag:
-        // true: do not flush to disc, but re-Integrate the singleton to the RAM
-        // false: flush the singleton together with container to disc
         
         plasmaWordIndexEntryContainer container = null;
         long time;
@@ -476,19 +473,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 	}
         
         // now decide where to flush that container
-        if (container.size() <= assortmentLimit) {
+        if (container.size() <= assortmentCluster.clusterCapacity) {
             // this fits into the assortments
             plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(key, container);
             if (feedback == null) {
                 return container.size();
-            } else if ((container.size() != feedback.size()) && (reintegrate)) {
-                // put assortmentRecord together with container back to ram
-                synchronized (cache) {
-                    cache.put(key, feedback);
-                    hashScore.setScore(key, feedback.size());
-                    hashDate.setScore(key, intTime(time));
-                }
-                return container.size() - feedback.size();
             } else {
                 // *** should care about another option here ***
                 return backend.addEntries(feedback, time);
@@ -522,7 +511,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 
     public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
         flushThread.pause();
-        flushFromMem(wordHash, false);
+        flushFromMem(wordHash);
         flushFromAssortmentCluster(wordHash);
         flushThread.proceed();
 	return backend.getIndex(wordHash, deleteIfEmpty);
@@ -553,7 +542,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
 
     public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
         flushThread.pause();
-        flushFromMem(wordHash, false);
+        flushFromMem(wordHash);
         flushFromAssortmentCluster(wordHash);
         int removed = backend.removeEntries(wordHash, urlHashes, deleteComplete);
         flushThread.proceed();
@@ -561,6 +550,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
     }
     
     public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime) {
+        // this puts the entries into the cache, not into the assortment directly
         flushThread.pause();
 	//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
         while (cache.size() >= this.maxWords) flushFromMem();
@@ -618,4 +608,42 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
         }
     }
 
+    public int migrateWords2Assortment(String wordhash) throws IOException {
+        // returns the number of entries that had been added to the assortments
+        // can be negative if some assortments have been moved to the backend
+        File db = plasmaWordIndexEntity.wordHash2path(databaseRoot, wordhash);
+        if (!(db.exists())) return 0;
+        plasmaWordIndexEntity entity = new plasmaWordIndexEntity(databaseRoot, wordhash, true);
+        int size = entity.size();
+        if (size > assortmentCluster.clusterCapacity) {
+            // this will be too big to integrate it
+            entity.close();
+            return 0;
+        } else {
+            // take out all words from the assortment to see if it fits
+            // together with the extracted assortment
+            plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash);
+            if (size + container.size() > assortmentCluster.clusterCapacity) {
+                // this will also be too big to integrate, add to entity
+                entity.addEntries(container);
+                entity.close();
+                return -container.size();
+            } else {
+                // the combined container will fit, read the container
+                Enumeration entries = entity.elements(true);
+                plasmaWordIndexEntry entry;
+                while (entries.hasMoreElements()) {
+                    entry = (plasmaWordIndexEntry) entries.nextElement();
+                    container.add(new plasmaWordIndexEntry[]{entry}, System.currentTimeMillis());
+                }
+                // we have read all elements, now delete the entity
+                entity.deleteComplete();
+                entity.close();
+                // integrate the container into the assortments; this will work
+                assortmentCluster.storeTry(wordhash, container);
+                return size;
+            }
+        }
+    }
+
 }
diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
index dad5f2c2f..9be8eeda7 100644
--- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
+++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
@@ -226,20 +226,12 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
     public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
 	//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
 	// fetch the index cache
-        if (container.size() == 0) return 0;
+        if ((container == null) || (container.size() == 0)) return 0;
         
         // open file
         try {
             plasmaWordIndexEntity pi = new plasmaWordIndexEntity(databaseRoot, container.wordHash(), false);
-            int count = 0;
-            
-            // write from vector
-            if (container != null) {
-                Iterator i = container.entries();
-                while (i.hasNext()) {
-                    if (pi.addEntry((plasmaWordIndexEntry) i.next())) count++;
-                }
-            }
+            int count = pi.addEntries(container);
             
             // close and return
             pi.close();
diff --git a/source/de/anomic/plasma/plasmaWordIndexEntity.java b/source/de/anomic/plasma/plasmaWordIndexEntity.java
index b29e955ca..0096aa2a4 100644
--- a/source/de/anomic/plasma/plasmaWordIndexEntity.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java
@@ -170,6 +170,31 @@ public class plasmaWordIndexEntity {
 	}
     }
     
+    public int addEntries(plasmaWordIndexEntryContainer container) {
+	//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
+	// fetch the index cache
+        if ((container == null) || (container.size() == 0)) return 0;
+        
+        // open file
+        try {
+            int count = 0;
+            
+            // write from vector
+            if (container != null) {
+                Iterator i = container.entries();
+                while (i.hasNext()) {
+                    if (addEntry((plasmaWordIndexEntry) i.next())) count++;
+                }
+            }
+            
+            // close and return
+            return count;
+        } catch (IOException e) {
+            e.printStackTrace();
+            return 0;
+        }
+    }
+    
     public boolean deleteComplete() throws IOException {
         if (theTmpMap == null) {
             theIndex.close();
diff --git a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
index a042c0b06..1c15d35b8 100644
--- a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
@@ -81,6 +81,11 @@ public class plasmaWordIndexEntryContainer implements Comparable {
         return wordHash;
     }
 
+    public int add(plasmaWordIndexEntry entry, long updateTime) {
+        this.updateTime = java.lang.Math.max(this.updateTime, updateTime);
+        return (add(entry)) ? 1 : 0;
+    }
+    
     public int add(plasmaWordIndexEntry[] entries, long updateTime) {
         int c = 0;
         for (int i = 0; i < entries.length; i++) if (add(entries[i])) c++;
diff --git a/source/yacy.java b/source/yacy.java
index 410c20201..d2b52aeeb 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -98,6 +98,8 @@ import de.anomic.plasma.plasmaWordIndex;
 import de.anomic.plasma.plasmaWordIndexEntity;
 import de.anomic.plasma.plasmaWordIndexEntry;
 import de.anomic.plasma.plasmaWordIndexEntryContainer;
+import de.anomic.plasma.plasmaWordIndexClassicDB;
+import de.anomic.plasma.plasmaWordIndexCache;
 import de.anomic.server.serverCodings;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverFileUtils;
@@ -504,40 +506,6 @@ public final class yacy {
         serverLog.logSystem("GEN-WORDSTAT", "FINISHED");
     }
     
-    private static void checkMigrate(File dbroot, serverLog log, File file, plasmaWordIndex wordIndex) throws IOException {
-        long length = file.length();
-        if (length > 3000) {
-            log.logInfo("SKIPPED  " + file.toString() + ": too big, size=" + (length / 1024) + "kb");
-            return;
-        }
-        kelondroTree db = new kelondroTree(file, 0);
-        String wordhash = file.getName().substring(0, 12);
-        int size = db.size();
-        db.close();
-        if (size <= 50) {
-            plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordhash);
-            plasmaWordIndexEntity entity = new plasmaWordIndexEntity(dbroot, wordhash, true);
-            Enumeration entries = entity.elements(true);
-            plasmaWordIndexEntry entry;
-            while (entries.hasMoreElements()) {
-                entry = (plasmaWordIndexEntry) entries.nextElement();
-                container.add(new plasmaWordIndexEntry[]{entry}, System.currentTimeMillis());
-            }
-            wordIndex.addEntries(container);
-            entity.deleteComplete();
-            entity.close();
-            if (file.exists()) {
-                log.logInfo("MIGRATED " + file.toString() + ": " + size + " entries, " + (length / 1024) + "kb, delete fail at end");
-                file.delete();
-            } else {
-                log.logInfo("MIGRATED " + file.toString() + ": " + size + " entries, " + (length / 1024) + "kb");
-            }
-        } else {
-            log.logInfo("SKIPPED  " + file.toString() + ": " + size + " entries, " + (length / 1024) + "kb");
-        }
-        db.close();
-    }
-    
     public static void migrateWords(String homePath) {
         // run with "java -classpath classes yacy -migratewords"
         try {serverLog.configureLogging(new File(homePath, "yacy.logging"));} catch (Exception e) {}
@@ -545,15 +513,26 @@ public final class yacy {
         try {
             serverLog log = new serverLog("WORDMIGRATION");
             log.logInfo("STARTING MIGRATION");
-            plasmaWordIndex wordIndex = new plasmaWordIndex(dbroot, 20000, log);
+            plasmaWordIndexCache wordIndexCache = new plasmaWordIndexCache(dbroot, new plasmaWordIndexClassicDB(dbroot, log), 20000, log);
             enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true);
+            String wordhash;
+            File wordfile;
+            int migration;
             while (words.hasMoreElements()) try {
-                checkMigrate(dbroot, log, (File) words.nextElement(), wordIndex);
+                wordfile = (File) words.nextElement();
+                wordhash = wordfile.getName().substring(0, 12);
+                migration = wordIndexCache.migrateWords2Assortment(wordhash);
+                if (migration == 0)
+                    log.logInfo("SKIPPED  " + wordhash + ": too big");
+                else if (migration > 0)
+                    log.logInfo("MIGRATED " + wordhash + ": " + migration + " entries");
+                else
+                    log.logInfo("REVERSED " + wordhash + ": " + (-migration) + " entries");
             } catch (Exception e) {
                 e.printStackTrace();
             }
             log.logInfo("FINISHED MIGRATION JOB, WAIT FOR DUMP");
-            wordIndex.close(60);
+            wordIndexCache.close(60);
             log.logInfo("TERMINATED MIGRATION");
         } catch (IOException e) {
             e.printStackTrace();