- removed useCell option: the indexCell data structure is now the default index structure; old collection data is still migrated

- added some debugging output to balancer to find a bug - removed unused classes for index collection handling - changed some default values for the process handling: more memory needed to prevent OOM git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5856 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 138422990a
parent 1b9e532c87
commit 138422990a
15 changed files with 65 additions and 761 deletions
--- a/build.properties
+++ b/build.properties
@ -3,7 +3,7 @@ javacSource=1.5
 javacTarget=1.5
 # Release Configuration
-releaseVersion=0.75
+releaseVersion=0.76
 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
--- a/defaults/performance_dht.profile
+++ b/defaults/performance_dht.profile
@ -4,33 +4,31 @@
 # performance-settings
 # delay-times for permanent loops (milliseconds)
-# the idlesleep is the pause that an proces sleeps if the last call to the
+# the idlesleep is the pause that an process sleeps if the last call to the
 # process job was without execution of anything;
 # the busysleep is the pause after a full job execution
 # the prereq-value is a memory pre-requisite: that much bytes must
 # be available/free in the heap; othervise the loop is not executed
 # and another idlesleep is performed
 20_dhtdistribution_idlesleep=5000
 20_dhtdistribution_busysleep=2000
-20_dhtdistribution_memprereq=6291456
+20_dhtdistribution_memprereq=12582912
 50_localcrawl_idlesleep=4000
-50_localcrawl_busysleep=500
+50_localcrawl_busysleep=50
-50_localcrawl_memprereq=4194304
+50_localcrawl_memprereq=12582912
 50_localcrawl_isPaused=false
-60_remotecrawlloader_idlesleep=60000
+60_remotecrawlloader_idlesleep=120000
-60_remotecrawlloader_busysleep=40000
+60_remotecrawlloader_busysleep=60000
-60_remotecrawlloader_memprereq=2097152
+60_remotecrawlloader_memprereq=12582912
 60_remotecrawlloader_isPaused=false
-62_remotetriggeredcrawl_idlesleep=10000
+62_remotetriggeredcrawl_idlesleep=60000
-62_remotetriggeredcrawl_busysleep=1000
+62_remotetriggeredcrawl_busysleep=10000
-62_remotetriggeredcrawl_memprereq=6291456
+62_remotetriggeredcrawl_memprereq=12582912
 62_remotetriggeredcrawl_isPaused=false
 80_indexing_idlesleep=1000
-80_indexing_busysleep=100
+80_indexing_busysleep=10
-80_indexing_memprereq=6291456
+80_indexing_memprereq=12582912
 85_cacheflush_idlesleep=120000
 85_cacheflush_busysleep=60000
 85_cacheflush_memprereq=0
 82_crawlstack_idlesleep=5000
 82_crawlstack_busysleep=1
 82_crawlstack_memprereq=1048576
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -569,35 +569,37 @@ filterOutStopwordsFromTopwords=true
 # and another idlesleep is performed
 20_dhtdistribution_idlesleep=30000
 20_dhtdistribution_busysleep=10000
-20_dhtdistribution_memprereq=6291456
+20_dhtdistribution_memprereq=12582912
 30_peerping_idlesleep=120000
 30_peerping_busysleep=120000
-30_peerping_memprereq=1048576
+30_peerping_memprereq=2097152
 40_peerseedcycle_idlesleep=1800000
 40_peerseedcycle_busysleep=1200000
-40_peerseedcycle_memprereq=2097152
+40_peerseedcycle_memprereq=4194304
 50_localcrawl_idlesleep=2000
 50_localcrawl_busysleep=50
-50_localcrawl_memprereq=4194304
+50_localcrawl_memprereq=12582912
 50_localcrawl_isPaused=false
 60_remotecrawlloader_idlesleep=60000
-60_remotecrawlloader_busysleep=30000
+60_remotecrawlloader_busysleep=10000
-60_remotecrawlloader_memprereq=2097152
+60_remotecrawlloader_memprereq=12582912
 60_remotecrawlloader_isPaused=false
-62_remotetriggeredcrawl_idlesleep=30000
+62_remotetriggeredcrawl_idlesleep=10000
-62_remotetriggeredcrawl_busysleep=10000
+62_remotetriggeredcrawl_busysleep=1000
-62_remotetriggeredcrawl_memprereq=6291456
+62_remotetriggeredcrawl_memprereq=12582912
 62_remotetriggeredcrawl_isPaused=false
 80_indexing_idlesleep=1000
 80_indexing_busysleep=10
-80_indexing_memprereq=6291456
+80_indexing_memprereq=12582912
 85_cacheflush_idlesleep=60000
 85_cacheflush_busysleep=10000
 85_cacheflush_memprereq=0
 90_cleanup_idlesleep=300000
 90_cleanup_busysleep=300000
 90_cleanup_memprereq=0
 # autoReCrawl Options
 autoReCrawl_idlesleep = 3600000
 autoReCrawl_busysleep = 3600000
 autoReCrawl_memprereq = -1
 # additional attributes:
 # performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time
 # is used to flush the RAM cache, which is the major part of the IO in YaCy
@ -886,10 +888,6 @@ routing.deleteOldSeeds.permission__pro = true
 routing.deleteOldSeeds.time = 7
 routing.deleteOldSeeds.time__pro = 30
 # autoReCrawl Options
 autoReCrawl_idlesleep = 3600000
 autoReCrawl_busysleep = 3600000
 autoReCrawl_memprereq = -1
 # options to remember the default search engines when using the search compare features
 compare_yacy.left = YaCy
@ -904,6 +902,3 @@ cgi.suffixes = cgi,pl
 # whether this is a version for a web browser
 browserintegration = false
 # next index data structure
 useCell = false
--- a/source/de/anomic/crawler/Balancer.java
+++ b/source/de/anomic/crawler/Balancer.java
@ -533,7 +533,8 @@ public class Balancer {
        // 3rd: take entry from file
        if ((result == null) && (urlFileStack.size() > 0)) {
-            final Row.Entry nextentry = (top) ? urlFileStack.top() : urlFileStack.bot();
+            Row.Entry nextentry = (top) ? urlFileStack.top() : urlFileStack.bot();
            if (nextentry == null) nextentry = (top) ? urlFileStack.bot() : urlFileStack.top();
            if (nextentry == null) {
                // emergency case: this means that something with the stack organization is wrong
                // the file appears to be broken. We kill the file.
--- a/source/de/anomic/crawler/IndexingStack.java
+++ b/source/de/anomic/crawler/IndexingStack.java
@ -95,9 +95,15 @@ public class IndexingStack {
    }
    public synchronized QueueEntry pop() throws IOException {
-        if (sbQueueStack.size() == 0) return null;
+        if (sbQueueStack.size() == 0) {
        	Log.logInfo("IndexingStack", "sbQueueStack.size() == 0");
        	return null;
        }
        final Row.Entry b = sbQueueStack.pot();
-        if (b == null) return null;
+        if (b == null) {
        	Log.logInfo("IndexingStack", "sbQueueStack.pot() == null");
        	return null;
        }
        return new QueueEntry(b);
    }
--- a/source/de/anomic/crawler/NoticedURL.java
+++ b/source/de/anomic/crawler/NoticedURL.java
@ -72,6 +72,7 @@ public class NoticedURL {
    }
    public void clear() {
    	Log.logInfo("NoticedURL", "clearing all stacks");
        coreStack.clear();
        limitStack.clear();
        remoteStack.clear();
@ -207,6 +208,7 @@ public class NoticedURL {
    }
    public void clear(final int stackType) {
    	Log.logInfo("NoticedURL", "clearing stack " + stackType);
        switch (stackType) {
                case STACK_TYPE_CORE:     coreStack.clear(); break;
                case STACK_TYPE_LIMIT:    limitStack.clear(); break;
--- a/source/de/anomic/kelondro/table/Stack.java
+++ b/source/de/anomic/kelondro/table/Stack.java
@ -37,6 +37,7 @@ import java.util.StringTokenizer;
 import de.anomic.kelondro.index.Row;
 import de.anomic.kelondro.order.NaturalOrder;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.kelondro.util.Log;
 import de.anomic.kelondro.util.kelondroException;
 public final class Stack extends FullRecords {
@ -184,7 +185,10 @@ public final class Stack extends FullRecords {
    public synchronized Row.Entry pot() throws IOException {
        // return row on the bottom of the stack and remove record
        final Node n = botNode();
-        if (n == null) return null;
+        if (n == null) {
        	Log.logInfo("Stack", "botNode() == null");
        	return null;
        }
        final Row.Entry ret = row().newEntry(n.getValueRow());
        // remove node
@ -239,9 +243,15 @@ public final class Stack extends FullRecords {
    private Node botNode() throws IOException {
        // return node on bottom of the stack
-        if (size() == 0) return null;
+        if (size() == 0) {
        	Log.logInfo("Stack", "size() == 0");
        	return null;
        }
        final RecordHandle h = getHandle(root);
-        if (h == null) return null;
+        if (h == null) {
        	Log.logInfo("Stack", "getHandle(root) == null");
        	return null;
        }
        return new EcoNode(h);
    }
--- a/source/de/anomic/kelondro/text/BufferedIndexCollection.java
+++ b/source/de/anomic/kelondro/text/BufferedIndexCollection.java
@ -1,344 +0,0 @@
 // BufferedIndexCollection.java
 // (C) 2005, 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 2005 on http://www.anomic.de
 //
 // This is a part of YaCy, a peer-to-peer based web search engine
 //
 // $LastChangedDate: 2009-03-13 11:34:51 +0100 (Fr, 13 Mrz 2009) $
 // $LastChangedRevision: 5709 $
 // $LastChangedBy: orbiter $
 //
 // LICENSE
 //
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
 // the Free Software Foundation; either version 2 of the License, or
 // (at your option) any later version.
 //
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 package de.anomic.kelondro.text;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Set;
 import de.anomic.kelondro.index.Row;
 import de.anomic.kelondro.index.RowCollection;
 import de.anomic.kelondro.order.Base64Order;
 import de.anomic.kelondro.order.ByteOrder;
 import de.anomic.kelondro.order.CloneableIterator;
 import de.anomic.kelondro.order.MergeIterator;
 import de.anomic.kelondro.order.Order;
 import de.anomic.kelondro.order.RotateIterator;
 import de.anomic.kelondro.text.Index;
 import de.anomic.kelondro.text.IndexBuffer;
 import de.anomic.kelondro.text.IndexCollection;
 import de.anomic.kelondro.text.ReferenceContainer;
 import de.anomic.kelondro.text.ReferenceContainerOrder;
 import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.kelondro.util.MemoryControl;
 import de.anomic.kelondro.util.Log;
 import de.anomic.server.serverProfiling;
 public final class BufferedIndexCollection<ReferenceType extends Reference> extends AbstractBufferedIndex<ReferenceType> implements Index<ReferenceType>, BufferedIndex<ReferenceType> {
    // environment constants
    public  static final long wCacheMaxAge    = 1000 * 60 * 30; // milliseconds; 30 minutes
    public  static final int  wCacheMaxChunk  =  800;           // maximum number of references for each urlhash
    public  static final int  lowcachedivisor =  900;
    public  static final int  maxCollectionPartition = 7;       // should be 7
    private final IndexBuffer<ReferenceType>     buffer;
    private final IndexCollection<ReferenceType> collections;
    public BufferedIndexCollection (
            File indexPrimaryTextLocation,
            final ReferenceFactory<ReferenceType> factory,
            final ByteOrder wordOrdering,
            final Row payloadrow,
            final int entityCacheMaxSize,
            final boolean useCommons, 
            final int redundancy,
            Log log) throws IOException {
        super(factory);
        final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE");
        if (!(textindexcache.exists())) textindexcache.mkdirs();
        if (new File(textindexcache, "index.dhtin.blob").exists()) {
            // migration of the both caches into one
            this.buffer = new IndexBuffer<ReferenceType>(textindexcache, factory, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
            IndexBuffer<ReferenceType> dhtInCache  = new IndexBuffer<ReferenceType>(textindexcache, factory, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log);
            for (ReferenceContainer<ReferenceType> c: dhtInCache) {
                this.buffer.add(c);
            }
            FileUtils.deletedelete(new File(textindexcache, "index.dhtin.blob"));
        } else {
            // read in new BLOB
            this.buffer = new IndexBuffer<ReferenceType>(textindexcache, factory, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);            
        }
        // create collections storage path
        final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION");
        if (!(textindexcollections.exists())) textindexcollections.mkdirs();
        this.collections = new IndexCollection<ReferenceType>(
                    textindexcollections, 
                    "collection",
                    factory,
                    12,
                    Base64Order.enhancedCoder,
                    maxCollectionPartition, 
                    WordReferenceRow.urlEntryRow, 
                    useCommons);
    }
    /* methods for interface Index */
    public void add(final ReferenceContainer<ReferenceType> entries) {
        assert (entries.row().objectsize == WordReferenceRow.urlEntryRow.objectsize);
        // add the entry
        buffer.add(entries);
        cacheFlushControl();
    }
    public void add(final byte[] wordHash, final ReferenceType entry) throws IOException {
        // add the entry
        buffer.add(wordHash, entry);
        cacheFlushControl();
    }
    public boolean has(final byte[] wordHash) {
        if (buffer.has(wordHash)) return true;
        if (collections.has(wordHash)) return true;
        return false;
    }
    public int count(byte[] key) {
        return buffer.count(key) + collections.count(key);
    }
    public ReferenceContainer<ReferenceType> get(final byte[] wordHash, final Set<String> urlselection) {
        if (wordHash == null) {
            // wrong input
            return null;
        }
        // get from cache
        ReferenceContainer<ReferenceType> container;
        container = buffer.get(wordHash, urlselection);
        // get from collection index
        if (container == null) {
            container = collections.get(wordHash, urlselection);
        } else {
            container.addAllUnique(collections.get(wordHash, urlselection));
        }
        if (container == null) return null;
        // check doubles
        final int beforeDouble = container.size();
        container.sort();
        final ArrayList<RowCollection> d = container.removeDoubles();
        RowCollection set;
        for (int i = 0; i < d.size(); i++) {
            // for each element in the double-set, take that one that is the most recent one
            set = d.get(i);
            WordReferenceRow e, elm = null;
            long lm = 0;
            for (int j = 0; j < set.size(); j++) {
                e = new WordReferenceRow(set.get(j, true));
                if ((elm == null) || (e.lastModified() > lm)) {
                    elm = e;
                    lm = e.lastModified();
                }
            }
            if(elm != null) {
                container.addUnique(elm.toKelondroEntry());
            }
        }
        if (container.size() < beforeDouble) System.out.println("*** DEBUG DOUBLECHECK - removed " + (beforeDouble - container.size()) + " index entries from word container " + container.getTermHashAsString());
        return container;
    }
    public ReferenceContainer<ReferenceType> delete(final byte[] wordHash) {
        final ReferenceContainer<ReferenceType> c = new ReferenceContainer<ReferenceType>(
                factory,
                wordHash,
                WordReferenceRow.urlEntryRow,
                buffer.count(wordHash));
        c.addAllUnique(buffer.delete(wordHash));
        c.addAllUnique(collections.delete(wordHash));
        return c;
    }
    public boolean remove(final byte[] wordHash, final String urlHash) {
        boolean removed = false;
        removed = removed | (buffer.remove(wordHash, urlHash));
        removed = removed | (collections.remove(wordHash, urlHash));
        return removed;
    }
    public int remove(final byte[] wordHash, final Set<String> urlHashes) {
        int removed = 0;
        removed += buffer.remove(wordHash, urlHashes);
        removed += collections.remove(wordHash, urlHashes);
        return removed;
    }
    public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startHash, final boolean rot, final boolean ram) throws IOException {
        final CloneableIterator<ReferenceContainer<ReferenceType>> i = wordContainers(startHash, ram);
        if (rot) {
            return new RotateIterator<ReferenceContainer<ReferenceType>>(i, Base64Order.zero(startHash.length), buffer.size() + ((ram) ? 0 : collections.size()));
        }
        return i;
    }
    private synchronized CloneableIterator<ReferenceContainer<ReferenceType>> wordContainers(final byte[] startWordHash, final boolean ram) throws IOException {
        final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, buffer.ordering().clone());
        ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startWordHash, 0);
        containerOrder.rotate(emptyContainer);
        if (ram) {
            return buffer.references(startWordHash, false);
        }
        return collections.references(startWordHash, false);
        /*
        return new MergeIterator<ReferenceContainer>(
                indexCache.referenceIterator(startWordHash, false, true),
                collections.referenceIterator(startWordHash, false, false),
                containerOrder,
                ReferenceContainer.containerMergeMethod,
                true);
        */
    }
    public void clear() {
        buffer.clear();
        try {
            collections.clear();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public void close() {
        buffer.close();
        collections.close();
    }
    public int size() {
        return java.lang.Math.max(collections.size(), buffer.size());
    }
    public int minMem() {
        return 1024*1024 /* indexing overhead */ + buffer.minMem() + collections.minMem();
    }
    /* 
     * methods for cache management
     */
    public int getBufferMaxReferences() {
        return buffer.getBufferMaxReferences();
    }
    public long getBufferMinAge() {
        return buffer.getBufferMinAge();
    }
    public long getBufferMaxAge() {
        return buffer.getBufferMaxAge();
    }
    public long getBufferSizeBytes() {
        return buffer.getBufferSizeBytes();
    }
    public void setBufferMaxWordCount(final int maxWords) {
        buffer.setMaxWordCount(maxWords);
    }
    private void cacheFlushControl() {
        // check for forced flush
        int cs = getBufferSize();
        if (cs > 0) {
            // flush elements that are too big. This flushing depends on the fact that the flush rule
            // selects the biggest elements first for flushing. If it does not for any reason, the following
            // loop would not terminate.
            serverProfiling.update("wordcache", Long.valueOf(cs), true);
            // To ensure termination an additional counter is used
            int l = 0;
            while (this.buffer.size() > 0 && (l++ < 100) && (this.buffer.getBufferMaxReferences() > wCacheMaxChunk)) {
                flushCacheOne(this.buffer);
            }
            // next flush more entries if the size exceeds the maximum size of the cache
            while (this.buffer.size() > 0 &&
                    ((this.buffer.size() > this.buffer.getMaxWordCount()) ||
                    (MemoryControl.available() < collections.minMem()))) {
                flushCacheOne(this.buffer);
            }
            if (getBufferSize() != cs) serverProfiling.update("wordcache", Long.valueOf(getBufferSize()), true);
        }
    }
    public void cleanupBuffer(int time) {
        flushCacheUntil(System.currentTimeMillis() + time);
    }
    private synchronized void flushCacheUntil(long timeout) {
        while (System.currentTimeMillis() < timeout && buffer.size() > 0) {
            flushCacheOne(buffer);
        }
    }
    private synchronized void flushCacheOne(final IndexBuffer<ReferenceType> ram) {
        if (ram.size() > 0) collections.add(flushContainer(ram));
    }
    private ReferenceContainer<ReferenceType> flushContainer(final IndexBuffer<ReferenceType> ram) {
        byte[] wordHash;
        ReferenceContainer<ReferenceType> c;
        wordHash = ram.maxScoreWordHash();
        c = ram.get(wordHash, null);
        if ((c != null) && (c.size() > wCacheMaxChunk)) {
            return ram.delete(wordHash);
        } else {
            return ram.delete(ram.bestFlushWordHash());
        }
    }
    public int getBackendSize() {
        return collections.size();
    }
    public int getBufferSize() {
        return buffer.size();
    }
    public ByteOrder ordering() {
        return collections.ordering();
    }
    public CloneableIterator<ReferenceContainer<ReferenceType>> references(byte[] startWordHash, boolean rot) {
        final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.buffer.ordering().clone());
        return new MergeIterator<ReferenceContainer<ReferenceType>>(
                this.buffer.references(startWordHash, false),
                this.collections.references(startWordHash, false),
                containerOrder,
                ReferenceContainer.containerMergeMethod,
                true);
    }
 }
--- a/source/de/anomic/kelondro/text/IndexBuffer.java
+++ b/source/de/anomic/kelondro/text/IndexBuffer.java
@ -1,348 +0,0 @@
 // IndexCache.java
 // (C) 2005, 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 2005 on http://yacy.net
 //
 // This is a part of YaCy, a peer-to-peer based web search engine
 //
 // $LastChangedDate$
 // $LastChangedRevision$
 // $LastChangedBy$
 //
 // LICENSE
 // 
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
 // the Free Software Foundation; either version 2 of the License, or
 // (at your option) any later version.
 //
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 package de.anomic.kelondro.text;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Set;
 import de.anomic.kelondro.index.Row;
 import de.anomic.kelondro.order.Base64Order;
 import de.anomic.kelondro.order.ByteOrder;
 import de.anomic.kelondro.order.CloneableIterator;
 import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
 import de.anomic.kelondro.util.MemoryControl;
 import de.anomic.kelondro.util.ScoreCluster;
 import de.anomic.kelondro.util.Log;
 /**
 * A IndexCache is a ReferenceContainerCache with an attached cache flush logic
 *
 */
 public final class IndexBuffer<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> implements Index<ReferenceType>, IndexReader<ReferenceType>, Iterable<ReferenceContainer<ReferenceType>> {
    // class variables
    private final ScoreCluster<byte[]> hashScore;
    private final ScoreCluster<byte[]> hashDate;
    private long  initTime;
    private int   cacheEntityMaxCount;       // the maximum number of cache slots for RWI entries
    public  int   cacheReferenceCountLimit;  // the maximum number of references to a single RWI entity
    public  long  cacheReferenceAgeLimit;    // the maximum age (= time not changed) of a RWI entity
    private final Log log;
    private final File dumpFile;
    private ReferenceContainerCache<ReferenceType> heap;
    @SuppressWarnings("unchecked")
    public IndexBuffer(
            final File databaseRoot,
            final ReferenceFactory<ReferenceType> factory,
            final ByteOrder wordOrdering,
            final Row payloadrow,
            final int entityCacheMaxSize,
            final int wCacheReferenceCountLimitInit,
            final long wCacheReferenceAgeLimitInit,
            final String newHeapName,
            final Log log) {
        super(factory);
        // creates a new index cache
        // the cache has a back-end where indexes that do not fit in the cache are flushed
        this.hashScore = new ScoreCluster<byte[]>(Base64Order.enhancedCoder);
        this.hashDate  = new ScoreCluster<byte[]>(Base64Order.enhancedCoder);
        this.initTime = System.currentTimeMillis();
        this.cacheEntityMaxCount = entityCacheMaxSize;
        this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit;
        this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit;
        this.log = log;
        this.dumpFile = new File(databaseRoot, newHeapName);
        this.heap = new ReferenceContainerCache(factory, payloadrow, wordOrdering);
        // read in dump of last session
        boolean initFailed = false;
        if (dumpFile.exists()) try {
            heap.initWriteModeFromBLOB(dumpFile);
        } catch (IOException e) {
            initFailed = true;
            e.printStackTrace();
        }
        if (initFailed) {
            log.logSevere("unable to restore cache dump");
            // get empty dump
            heap.initWriteMode();
        } else if (dumpFile.exists()) {
            // initialize scores for cache organization
            for (final ReferenceContainer ic : (Iterable<ReferenceContainer>) heap.references(null, false)) {
                this.hashDate.setScore(ic.getTermHash(), intTime(ic.lastWrote()));
                this.hashScore.setScore(ic.getTermHash(), ic.size());
            }
        } else {
            heap.initWriteMode();
        }
    }
    /**
     * clear the content
     * @throws IOException 
     */
    public void clear() {
        hashScore.clear();
        hashDate.clear();
        initTime = System.currentTimeMillis();
        heap.clear();
    }
    public int minMem() {
        // there is no specific large array that needs to be maintained
        // this value is just a guess of the possible overhead
        return 100 * 1024; // 100 kb
    }
    // cache settings
    public int getBufferMaxReferences() {
        if (hashScore.size() == 0) return 0;
        return hashScore.getMaxScore();
    }
    public long getBufferMinAge() {
        if (hashDate.size() == 0) return 0;
        return System.currentTimeMillis() - longEmit(hashDate.getMaxScore());
    }
    public long getBufferMaxAge() {
        if (hashDate.size() == 0) return 0;
        return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
    }
    public void setMaxWordCount(final int maxWords) {
        this.cacheEntityMaxCount = maxWords;
    }
    public int getMaxWordCount() {
        return this.cacheEntityMaxCount;
    }
    public int size() {
    	if (heap == null) return 0;
        return heap.size();
    }
    public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startWordHash, final boolean rot) {
        // we return an iterator object that creates top-level-clones of the indexContainers
        // in the cache, so that manipulations of the iterated objects do not change
        // objects in the cache.
        return heap.references(startWordHash, rot);
    }
    public synchronized byte[] maxScoreWordHash() {
        if (heap == null || heap.size() == 0) return null;
        try {
            return hashScore.getMaxObject();
        } catch (final Exception e) {
            log.logSevere("flushFromMem: " + e.getMessage(), e);
        }
        return null;
    }
    public byte[] bestFlushWordHash() {
        // select appropriate hash
        // we have 2 different methods to find a good hash:
        // - the oldest entry in the cache
        // - the entry with maximum count
        if (heap == null || heap.size() == 0) return null;
        try {
            //return hashScore.getMaxObject();
            byte[] hash = null;
            final int count = hashScore.getMaxScore();
            if ((count >= cacheReferenceCountLimit) &&
                ((hash = hashScore.getMaxObject()) != null)) {
                // we MUST flush high-score entries, because a loop deletes entries in cache until this condition fails
                // in this cache we MUST NOT check wCacheMinAge
                return hash;
            }
            final long oldestTime = longEmit(hashDate.getMinScore());
            if (((System.currentTimeMillis() - oldestTime) > cacheReferenceAgeLimit) &&
                ((hash = hashDate.getMinObject()) != null)) {
                // flush out-dated entries
                return hash;
            }
            // cases with respect to memory situation
            if (MemoryControl.free() < 100000) {
                // urgent low-memory case
                hash = hashScore.getMaxObject(); // flush high-score entries (saves RAM)
            } else {
                // not-efficient-so-far case. cleans up unnecessary cache slots
                hash = hashDate.getMinObject(); // flush oldest entries
            }
            if (hash == null) {
                final ReferenceContainer<ReferenceType> ic = heap.references(null, false).next();
                if (ic != null) hash = ic.getTermHash();
            }
            return hash;
        } catch (final Exception e) {
            log.logSevere("flushFromMem: " + e.getMessage(), e);
        }
        return null;
    }
    public synchronized ArrayList<ReferenceContainer<ReferenceType>> bestFlushContainers(final int count) {
        final ArrayList<ReferenceContainer<ReferenceType>> containerList = new ArrayList<ReferenceContainer<ReferenceType>>();
        byte[] hash;
        ReferenceContainer<ReferenceType> container;
        for (int i = 0; i < count; i++) {
            hash = bestFlushWordHash();
            if (hash == null) return containerList;
            container = heap.delete(hash);
            assert (container != null);
            if (container == null) return containerList;
            hashScore.deleteScore(hash);
            hashDate.deleteScore(hash);
            containerList.add(container);
        }
        return containerList;
    }
    private int intTime(final long longTime) {
        return (int) Math.max(0, ((longTime - initTime) / 1000));
    }
    private long longEmit(final int intTime) {
        return (((long) intTime) * (long) 1000) + initTime;
    }
    public boolean has(final byte[] wordHash) {
        return heap.has(wordHash);
    }
    public int count(byte[] key) {
        return this.heap.count(key);
    }
    public synchronized ReferenceContainer<ReferenceType> get(final byte[] wordHash, final Set<String> urlselection) {
        if (wordHash == null) return null;
        // retrieve container
        ReferenceContainer<ReferenceType> container = heap.get(wordHash, null);
        // We must not use the container from cache to store everything we find,
        // as that container remains linked to in the cache and might be changed later
        // while the returned container is still in use.
        // create a clone from the container
        if (container != null) container = container.topLevelClone();
        // select the urlselection
        if ((urlselection != null) && (container != null)) container.select(urlselection);
        return container;
    }
    public synchronized ReferenceContainer<ReferenceType> delete(final byte[] wordHash) {
        // returns the index that had been deleted
    	if (wordHash == null || heap == null) return null;
        final ReferenceContainer<ReferenceType> container = heap.delete(wordHash);
        hashScore.deleteScore(wordHash);
        hashDate.deleteScore(wordHash);
        return container;
    }
    public synchronized boolean remove(final byte[] wordHash, final String urlHash) {
        final boolean removed = heap.remove(wordHash, urlHash);
        if (removed) {
            if (heap.has(wordHash)) {
                hashScore.decScore(wordHash);
                hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
            } else {
                hashScore.deleteScore(wordHash);
                hashDate.deleteScore(wordHash);
            }
            return true;
        }
        return false;
    }
    public synchronized int remove(final byte[] wordHash, final Set<String> urlHashes) {
        if (urlHashes.size() == 0) return 0;
        final int c = heap.remove(wordHash, urlHashes);
        if (c > 0) {
            // removal successful
            if (heap.has(wordHash)) {
                hashScore.addScore(wordHash, -c);
                hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
            } else {
                hashScore.deleteScore(wordHash);
                hashDate.deleteScore(wordHash);
            }
            return c;
        }
        return 0;
    }
    public synchronized void add(final ReferenceContainer<ReferenceType> container) {
        if (container == null || container.size() == 0 || heap == null) return;
        // put new words into cache
        heap.add(container);
        hashScore.setScore(container.getTermHash(), heap.count(container.getTermHash()));
        hashDate.setScore(container.getTermHash(), intTime(System.currentTimeMillis()));
    }
    public void add(final byte[] wordHash, final ReferenceType entry) throws IOException {
        if (entry == null || heap == null) return;
        // put new words into cache
        heap.add(wordHash, entry);
        hashScore.incScore(wordHash);
        hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
    }
    public synchronized void close() {
        heap.dump(this.dumpFile);
        heap = null;
        hashScore.clear();
        hashDate.clear();
    }
    public Iterator<ReferenceContainer<ReferenceType>> iterator() {
        return references(null, false);
    }
    public ByteOrder ordering() {
        return heap.ordering();
    }
    public synchronized long getBufferSizeBytes() {
        // calculate the real size in bytes of the index cache
        long cacheBytes = 0;
        final long entryBytes = WordReferenceRow.urlEntryRow.objectsize;
        final Iterator<ReferenceContainer<ReferenceType>> it = references(null, false);
        while (it.hasNext()) cacheBytes += it.next().size() * entryBytes;
        return cacheBytes;
    }
 }
--- a/source/de/anomic/kelondro/text/IndexCollection.java
+++ b/source/de/anomic/kelondro/text/IndexCollection.java
@ -60,7 +60,7 @@ import de.anomic.kelondro.util.kelondroOutOfLimitsException;
 import de.anomic.kelondro.util.Log;
 import de.anomic.yacy.yacyURL;
-public class IndexCollection<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> implements Index<ReferenceType> {
+public class IndexCollection<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> {
 	private static final int loadfactor = 4;
    private static final int serialNumber = 0;
--- a/source/de/anomic/kelondro/text/IndexCollectionMigration.java
+++ b/source/de/anomic/kelondro/text/IndexCollectionMigration.java
@ -107,7 +107,7 @@ public final class IndexCollectionMigration<ReferenceType extends Reference> ext
                        factory,
                        12,
                        Base64Order.enhancedCoder,
-                        BufferedIndexCollection.maxCollectionPartition, 
+                        7, 
                        WordReferenceRow.urlEntryRow, 
                        false);
            if (this.collections.size() == 0) {
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -321,7 +321,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        final boolean useCommons = getConfigBool("index.storeCommons", false);
        final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);        
        final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
        final boolean useCell = sb.getConfigBool("useCell", false);
        try {
 			webIndex = new plasmaWordIndex(
 			        networkName,
@ -331,8 +330,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
 			        wordCacheMaxCount,
 			        useCommons,
 			        redundancy,
-			        paritionExponent,
+			        paritionExponent);
 			        useCell);
 		} catch (IOException e1) {
 			e1.printStackTrace();
 			webIndex = null;
@ -800,7 +798,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
            final boolean useCommons = getConfigBool("index.storeCommons", false);
            final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
            final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
            final boolean useCell = sb.getConfigBool("useCell", false);
            try {
 				this.webIndex = new plasmaWordIndex(
 				        getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""),
@ -810,8 +807,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
 				        wordCacheMaxCount,
 				        useCommons,
 				        redundancy,
-				        paritionExponent,
+				        paritionExponent);
 				        useCell);
 			} catch (IOException e) {
 				e.printStackTrace();
 				this.webIndex = null;
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -45,7 +45,6 @@ import de.anomic.kelondro.blob.BLOBArray;
 import de.anomic.kelondro.order.Base64Order;
 import de.anomic.kelondro.order.ByteOrder;
 import de.anomic.kelondro.text.BufferedIndex;
 import de.anomic.kelondro.text.BufferedIndexCollection;
 import de.anomic.kelondro.text.IndexCell;
 import de.anomic.kelondro.text.IndexCollectionMigration;
 import de.anomic.kelondro.text.ReferenceContainer;
@ -123,8 +122,7 @@ public final class plasmaWordIndex {
            final int entityCacheMaxSize,
            final boolean useCommons,
            final int redundancy,
-            final int partitionExponent,
+            final int partitionExponent) throws IOException {
            final boolean useCell) throws IOException {
        log.logInfo("Initializing Word Index for the network '" + networkName + "', word hash cache size is " + Word.hashCacheSize + ".");
@ -154,10 +152,9 @@ public final class plasmaWordIndex {
        // check if the peer has migrated the index
        if (new File(indexPrimaryTextLocation, "RICOLLECTION").exists()) {
-            this.merger = (useCell) ? new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1) : null;
+            this.merger = new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1);
            if (this.merger != null) this.merger.start();
-            this.index = (useCell) ? 
+            this.index = new IndexCollectionMigration<WordReference>(
                                    new IndexCollectionMigration<WordReference>(
                                    indexPrimaryTextLocation,
                                    wordReferenceFactory,
                                    wordOrder,
@ -166,17 +163,7 @@ public final class plasmaWordIndex {
                                    targetFileSize,
                                    maxFileSize,
                                    this.merger,
-                                    log)
+                                    log);
                                   :
                                    new BufferedIndexCollection<WordReference>(
                                            indexPrimaryTextLocation,
                                            wordReferenceFactory,
                                            wordOrder,
                                            WordReferenceRow.urlEntryRow,
                                            entityCacheMaxSize,
                                            useCommons, 
                                            redundancy,
                                            log);
        } else {
            this.merger = new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1);
            this.merger.start();
--- a/source/de/anomic/yacy/dht/Dispatcher.java
+++ b/source/de/anomic/yacy/dht/Dispatcher.java
@ -328,6 +328,7 @@ public class Dispatcher {
     * This method returns true if a container was dequeued, false if not
     */
    public boolean dequeueContainer() {
    	if (transmissionCloud == null) return false;
        if (this.indexingTransmissionProcessor.queueSize() > indexingTransmissionProcessor.concurrency()) return false;
        byte[] maxtarget = null;
        int maxsize = -1;
--- a/source/yacy.java
+++ b/source/yacy.java
@ -675,7 +675,7 @@ public final class yacy {
            final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
-            final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0, false);
+            final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
            final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA".getBytes(), false, false);
            long urlCounter = 0, wordCounter = 0;
@ -866,7 +866,7 @@ public final class yacy {
        try {
            Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
            if (resource.equals("all")) {
-                WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0, false);
+                WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
                indexContainerIterator = WordIndex.index().references(wordChunkStartHash.getBytes(), false, false);
            }
            int counter = 0;