- removed useCell option: the indexCell data structure is now the default index structure; old collection data is still migrated

- added some debugging output to balancer to find a bug - removed unused classes for index collection handling - changed some default values for the process handling: more memory needed to prevent OOM git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5856 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 138422990a
parent 1b9e532c87
commit 138422990a
15 changed files with 65 additions and 761 deletions
--- a/build.properties
+++ b/build.properties
@ -3,7 +3,7 @@ javacSource=1.5
 javacTarget=1.5

 # Release Configuration
-releaseVersion=0.75
+releaseVersion=0.76
 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
--- a/defaults/performance_dht.profile
+++ b/defaults/performance_dht.profile
@ -4,33 +4,31 @@

 # performance-settings
 # delay-times for permanent loops (milliseconds)
-# the idlesleep is the pause that an proces sleeps if the last call to the
+# the idlesleep is the pause that an process sleeps if the last call to the
 # process job was without execution of anything;
 # the busysleep is the pause after a full job execution
 # the prereq-value is a memory pre-requisite: that much bytes must
 # be available/free in the heap; othervise the loop is not executed
 # and another idlesleep is performed
+
 20_dhtdistribution_idlesleep=5000
 20_dhtdistribution_busysleep=2000
-20_dhtdistribution_memprereq=6291456
+20_dhtdistribution_memprereq=12582912
 50_localcrawl_idlesleep=4000
-50_localcrawl_busysleep=500
-50_localcrawl_memprereq=4194304
+50_localcrawl_busysleep=50
+50_localcrawl_memprereq=12582912
 50_localcrawl_isPaused=false
-60_remotecrawlloader_idlesleep=60000
-60_remotecrawlloader_busysleep=40000
-60_remotecrawlloader_memprereq=2097152
+60_remotecrawlloader_idlesleep=120000
+60_remotecrawlloader_busysleep=60000
+60_remotecrawlloader_memprereq=12582912
 60_remotecrawlloader_isPaused=false
-62_remotetriggeredcrawl_idlesleep=10000
-62_remotetriggeredcrawl_busysleep=1000
-62_remotetriggeredcrawl_memprereq=6291456
+62_remotetriggeredcrawl_idlesleep=60000
+62_remotetriggeredcrawl_busysleep=10000
+62_remotetriggeredcrawl_memprereq=12582912
 62_remotetriggeredcrawl_isPaused=false
 80_indexing_idlesleep=1000
-80_indexing_busysleep=100
-80_indexing_memprereq=6291456
-85_cacheflush_idlesleep=120000
-85_cacheflush_busysleep=60000
-85_cacheflush_memprereq=0
+80_indexing_busysleep=10
+80_indexing_memprereq=12582912
 82_crawlstack_idlesleep=5000
 82_crawlstack_busysleep=1
 82_crawlstack_memprereq=1048576
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -569,35 +569,37 @@ filterOutStopwordsFromTopwords=true
 # and another idlesleep is performed
 20_dhtdistribution_idlesleep=30000
 20_dhtdistribution_busysleep=10000
-20_dhtdistribution_memprereq=6291456
+20_dhtdistribution_memprereq=12582912
 30_peerping_idlesleep=120000
 30_peerping_busysleep=120000
-30_peerping_memprereq=1048576
+30_peerping_memprereq=2097152
 40_peerseedcycle_idlesleep=1800000
 40_peerseedcycle_busysleep=1200000
-40_peerseedcycle_memprereq=2097152
+40_peerseedcycle_memprereq=4194304
 50_localcrawl_idlesleep=2000
 50_localcrawl_busysleep=50
-50_localcrawl_memprereq=4194304
+50_localcrawl_memprereq=12582912
 50_localcrawl_isPaused=false
 60_remotecrawlloader_idlesleep=60000
-60_remotecrawlloader_busysleep=30000
-60_remotecrawlloader_memprereq=2097152
+60_remotecrawlloader_busysleep=10000
+60_remotecrawlloader_memprereq=12582912
 60_remotecrawlloader_isPaused=false
-62_remotetriggeredcrawl_idlesleep=30000
-62_remotetriggeredcrawl_busysleep=10000
-62_remotetriggeredcrawl_memprereq=6291456
+62_remotetriggeredcrawl_idlesleep=10000
+62_remotetriggeredcrawl_busysleep=1000
+62_remotetriggeredcrawl_memprereq=12582912
 62_remotetriggeredcrawl_isPaused=false
 80_indexing_idlesleep=1000
 80_indexing_busysleep=10
-80_indexing_memprereq=6291456
-85_cacheflush_idlesleep=60000
-85_cacheflush_busysleep=10000
-85_cacheflush_memprereq=0
+80_indexing_memprereq=12582912
 90_cleanup_idlesleep=300000
 90_cleanup_busysleep=300000
 90_cleanup_memprereq=0

+# autoReCrawl Options
+autoReCrawl_idlesleep = 3600000
+autoReCrawl_busysleep = 3600000
+autoReCrawl_memprereq = -1
+
 # additional attributes:
 # performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time
 # is used to flush the RAM cache, which is the major part of the IO in YaCy
@ -886,10 +888,6 @@ routing.deleteOldSeeds.permission__pro = true
 routing.deleteOldSeeds.time = 7
 routing.deleteOldSeeds.time__pro = 30

-# autoReCrawl Options
-autoReCrawl_idlesleep = 3600000
-autoReCrawl_busysleep = 3600000
-autoReCrawl_memprereq = -1

 # options to remember the default search engines when using the search compare features
 compare_yacy.left = YaCy
@ -904,6 +902,3 @@ cgi.suffixes = cgi,pl

 # whether this is a version for a web browser
 browserintegration = false
-
-# next index data structure
-useCell = false
--- a/source/de/anomic/crawler/Balancer.java
+++ b/source/de/anomic/crawler/Balancer.java
@ -533,7 +533,8 @@ public class Balancer {
        
        // 3rd: take entry from file
        if ((result == null) && (urlFileStack.size() > 0)) {
-            final Row.Entry nextentry = (top) ? urlFileStack.top() : urlFileStack.bot();
+            Row.Entry nextentry = (top) ? urlFileStack.top() : urlFileStack.bot();
+            if (nextentry == null) nextentry = (top) ? urlFileStack.bot() : urlFileStack.top();
            if (nextentry == null) {
                // emergency case: this means that something with the stack organization is wrong
                // the file appears to be broken. We kill the file.
--- a/source/de/anomic/crawler/IndexingStack.java
+++ b/source/de/anomic/crawler/IndexingStack.java
@ -95,9 +95,15 @@ public class IndexingStack {
    }

    public synchronized QueueEntry pop() throws IOException {
-        if (sbQueueStack.size() == 0) return null;
+        if (sbQueueStack.size() == 0) {
+        	Log.logInfo("IndexingStack", "sbQueueStack.size() == 0");
+        	return null;
+        }
        final Row.Entry b = sbQueueStack.pot();
-        if (b == null) return null;
+        if (b == null) {
+        	Log.logInfo("IndexingStack", "sbQueueStack.pot() == null");
+        	return null;
+        }
        return new QueueEntry(b);
    }

--- a/source/de/anomic/crawler/NoticedURL.java
+++ b/source/de/anomic/crawler/NoticedURL.java
@ -72,6 +72,7 @@ public class NoticedURL {
    }
    
    public void clear() {
+    	Log.logInfo("NoticedURL", "clearing all stacks");
        coreStack.clear();
        limitStack.clear();
        remoteStack.clear();
@ -207,6 +208,7 @@ public class NoticedURL {
    }

    public void clear(final int stackType) {
+    	Log.logInfo("NoticedURL", "clearing stack " + stackType);
        switch (stackType) {
                case STACK_TYPE_CORE:     coreStack.clear(); break;
                case STACK_TYPE_LIMIT:    limitStack.clear(); break;
--- a/source/de/anomic/kelondro/table/Stack.java
+++ b/source/de/anomic/kelondro/table/Stack.java
@ -37,6 +37,7 @@ import java.util.StringTokenizer;
 import de.anomic.kelondro.index.Row;
 import de.anomic.kelondro.order.NaturalOrder;
 import de.anomic.kelondro.util.FileUtils;
+import de.anomic.kelondro.util.Log;
 import de.anomic.kelondro.util.kelondroException;

 public final class Stack extends FullRecords {
@ -184,7 +185,10 @@ public final class Stack extends FullRecords {
    public synchronized Row.Entry pot() throws IOException {
        // return row on the bottom of the stack and remove record
        final Node n = botNode();
-        if (n == null) return null;
+        if (n == null) {
+        	Log.logInfo("Stack", "botNode() == null");
+        	return null;
+        }
        final Row.Entry ret = row().newEntry(n.getValueRow());

        // remove node
@ -239,9 +243,15 @@ public final class Stack extends FullRecords {
    
    private Node botNode() throws IOException {
        // return node on bottom of the stack
-        if (size() == 0) return null;
+        if (size() == 0) {
+        	Log.logInfo("Stack", "size() == 0");
+        	return null;
+        }
        final RecordHandle h = getHandle(root);
-        if (h == null) return null;
+        if (h == null) {
+        	Log.logInfo("Stack", "getHandle(root) == null");
+        	return null;
+        }
        return new EcoNode(h);
    }
    
--- a/source/de/anomic/kelondro/text/BufferedIndexCollection.java
+++ b/source/de/anomic/kelondro/text/BufferedIndexCollection.java
@ -1,344 +0,0 @@
-// BufferedIndexCollection.java
-// (C) 2005, 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 2005 on http://www.anomic.de
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate: 2009-03-13 11:34:51 +0100 (Fr, 13 Mrz 2009) $
-// $LastChangedRevision: 5709 $
-// $LastChangedBy: orbiter $
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package de.anomic.kelondro.text;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Set;
-
-import de.anomic.kelondro.index.Row;
-import de.anomic.kelondro.index.RowCollection;
-import de.anomic.kelondro.order.Base64Order;
-import de.anomic.kelondro.order.ByteOrder;
-import de.anomic.kelondro.order.CloneableIterator;
-import de.anomic.kelondro.order.MergeIterator;
-import de.anomic.kelondro.order.Order;
-import de.anomic.kelondro.order.RotateIterator;
-import de.anomic.kelondro.text.Index;
-import de.anomic.kelondro.text.IndexBuffer;
-import de.anomic.kelondro.text.IndexCollection;
-import de.anomic.kelondro.text.ReferenceContainer;
-import de.anomic.kelondro.text.ReferenceContainerOrder;
-import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
-import de.anomic.kelondro.util.FileUtils;
-import de.anomic.kelondro.util.MemoryControl;
-import de.anomic.kelondro.util.Log;
-import de.anomic.server.serverProfiling;
-
-public final class BufferedIndexCollection<ReferenceType extends Reference> extends AbstractBufferedIndex<ReferenceType> implements Index<ReferenceType>, BufferedIndex<ReferenceType> {
-
-    // environment constants
-    public  static final long wCacheMaxAge    = 1000 * 60 * 30; // milliseconds; 30 minutes
-    public  static final int  wCacheMaxChunk  =  800;           // maximum number of references for each urlhash
-    public  static final int  lowcachedivisor =  900;
-    public  static final int  maxCollectionPartition = 7;       // should be 7
-    
-    private final IndexBuffer<ReferenceType>     buffer;
-    private final IndexCollection<ReferenceType> collections;
-    
-    public BufferedIndexCollection (
-            File indexPrimaryTextLocation,
-            final ReferenceFactory<ReferenceType> factory,
-            final ByteOrder wordOrdering,
-            final Row payloadrow,
-            final int entityCacheMaxSize,
-            final boolean useCommons, 
-            final int redundancy,
-            Log log) throws IOException {
-        super(factory);
-        
-        final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE");
-        if (!(textindexcache.exists())) textindexcache.mkdirs();
-        if (new File(textindexcache, "index.dhtin.blob").exists()) {
-            // migration of the both caches into one
-            this.buffer = new IndexBuffer<ReferenceType>(textindexcache, factory, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
-            IndexBuffer<ReferenceType> dhtInCache  = new IndexBuffer<ReferenceType>(textindexcache, factory, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log);
-            for (ReferenceContainer<ReferenceType> c: dhtInCache) {
-                this.buffer.add(c);
-            }
-            FileUtils.deletedelete(new File(textindexcache, "index.dhtin.blob"));
-        } else {
-            // read in new BLOB
-            this.buffer = new IndexBuffer<ReferenceType>(textindexcache, factory, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);            
-        }
-        
-        // create collections storage path
-        final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION");
-        if (!(textindexcollections.exists())) textindexcollections.mkdirs();
-        this.collections = new IndexCollection<ReferenceType>(
-                    textindexcollections, 
-                    "collection",
-                    factory,
-                    12,
-                    Base64Order.enhancedCoder,
-                    maxCollectionPartition, 
-                    WordReferenceRow.urlEntryRow, 
-                    useCommons);
-    }
-
-    /* methods for interface Index */
-    
-    public void add(final ReferenceContainer<ReferenceType> entries) {
-        assert (entries.row().objectsize == WordReferenceRow.urlEntryRow.objectsize);
- 
-        // add the entry
-        buffer.add(entries);
-        cacheFlushControl();
-    }
-    
-    public void add(final byte[] wordHash, final ReferenceType entry) throws IOException {
-        // add the entry
-        buffer.add(wordHash, entry);
-        cacheFlushControl();
-    }
-
-    public boolean has(final byte[] wordHash) {
-        if (buffer.has(wordHash)) return true;
-        if (collections.has(wordHash)) return true;
-        return false;
-    }
-    
-    public int count(byte[] key) {
-        return buffer.count(key) + collections.count(key);
-    }
-    
-    public ReferenceContainer<ReferenceType> get(final byte[] wordHash, final Set<String> urlselection) {
-        if (wordHash == null) {
-            // wrong input
-            return null;
-        }
-        
-        // get from cache
-        ReferenceContainer<ReferenceType> container;
-        container = buffer.get(wordHash, urlselection);
-        
-        // get from collection index
-        if (container == null) {
-            container = collections.get(wordHash, urlselection);
-        } else {
-            container.addAllUnique(collections.get(wordHash, urlselection));
-        }
-        
-        if (container == null) return null;
-        
-        // check doubles
-        final int beforeDouble = container.size();
-        container.sort();
-        final ArrayList<RowCollection> d = container.removeDoubles();
-        RowCollection set;
-        for (int i = 0; i < d.size(); i++) {
-            // for each element in the double-set, take that one that is the most recent one
-            set = d.get(i);
-            WordReferenceRow e, elm = null;
-            long lm = 0;
-            for (int j = 0; j < set.size(); j++) {
-                e = new WordReferenceRow(set.get(j, true));
-                if ((elm == null) || (e.lastModified() > lm)) {
-                    elm = e;
-                    lm = e.lastModified();
-                }
-            }
-            if(elm != null) {
-                container.addUnique(elm.toKelondroEntry());
-            }
-        }
-        if (container.size() < beforeDouble) System.out.println("*** DEBUG DOUBLECHECK - removed " + (beforeDouble - container.size()) + " index entries from word container " + container.getTermHashAsString());
-
-        return container;
-    }
-
-    public ReferenceContainer<ReferenceType> delete(final byte[] wordHash) {
-        final ReferenceContainer<ReferenceType> c = new ReferenceContainer<ReferenceType>(
-                factory,
-                wordHash,
-                WordReferenceRow.urlEntryRow,
-                buffer.count(wordHash));
-        c.addAllUnique(buffer.delete(wordHash));
-        c.addAllUnique(collections.delete(wordHash));
-        return c;
-    }
-    
-    public boolean remove(final byte[] wordHash, final String urlHash) {
-        boolean removed = false;
-        removed = removed | (buffer.remove(wordHash, urlHash));
-        removed = removed | (collections.remove(wordHash, urlHash));
-        return removed;
-    }
-    
-    public int remove(final byte[] wordHash, final Set<String> urlHashes) {
-        int removed = 0;
-        removed += buffer.remove(wordHash, urlHashes);
-        removed += collections.remove(wordHash, urlHashes);
-        return removed;
-    }
-    
-    public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startHash, final boolean rot, final boolean ram) throws IOException {
-        final CloneableIterator<ReferenceContainer<ReferenceType>> i = wordContainers(startHash, ram);
-        if (rot) {
-            return new RotateIterator<ReferenceContainer<ReferenceType>>(i, Base64Order.zero(startHash.length), buffer.size() + ((ram) ? 0 : collections.size()));
-        }
-        return i;
-    }
-    
-    private synchronized CloneableIterator<ReferenceContainer<ReferenceType>> wordContainers(final byte[] startWordHash, final boolean ram) throws IOException {
-        final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, buffer.ordering().clone());
-        ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startWordHash, 0);
-        containerOrder.rotate(emptyContainer);
-        if (ram) {
-            return buffer.references(startWordHash, false);
-        }
-        return collections.references(startWordHash, false);
-        /*
-        return new MergeIterator<ReferenceContainer>(
-                indexCache.referenceIterator(startWordHash, false, true),
-                collections.referenceIterator(startWordHash, false, false),
-                containerOrder,
-                ReferenceContainer.containerMergeMethod,
-                true);
-        */
-    }
-    
-    public void clear() {
-        buffer.clear();
-        try {
-            collections.clear();
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-    }
-    
-    public void close() {
-        buffer.close();
-        collections.close();
-    }
-    
-    public int size() {
-        return java.lang.Math.max(collections.size(), buffer.size());
-    }
-    
-    public int minMem() {
-        return 1024*1024 /* indexing overhead */ + buffer.minMem() + collections.minMem();
-    }
-
-    
-    /* 
-     * methods for cache management
-     */
-    
-    public int getBufferMaxReferences() {
-        return buffer.getBufferMaxReferences();
-    }
-
-    public long getBufferMinAge() {
-        return buffer.getBufferMinAge();
-    }
-
-    public long getBufferMaxAge() {
-        return buffer.getBufferMaxAge();
-    }
-    
-    public long getBufferSizeBytes() {
-        return buffer.getBufferSizeBytes();
-    }
-
-    public void setBufferMaxWordCount(final int maxWords) {
-        buffer.setMaxWordCount(maxWords);
-    }
-
-    private void cacheFlushControl() {
-        // check for forced flush
-        int cs = getBufferSize();
-        if (cs > 0) {
-            // flush elements that are too big. This flushing depends on the fact that the flush rule
-            // selects the biggest elements first for flushing. If it does not for any reason, the following
-            // loop would not terminate.
-            serverProfiling.update("wordcache", Long.valueOf(cs), true);
-            // To ensure termination an additional counter is used
-            int l = 0;
-            while (this.buffer.size() > 0 && (l++ < 100) && (this.buffer.getBufferMaxReferences() > wCacheMaxChunk)) {
-                flushCacheOne(this.buffer);
-            }
-            // next flush more entries if the size exceeds the maximum size of the cache
-            while (this.buffer.size() > 0 &&
-                    ((this.buffer.size() > this.buffer.getMaxWordCount()) ||
-                    (MemoryControl.available() < collections.minMem()))) {
-                flushCacheOne(this.buffer);
-            }
-            if (getBufferSize() != cs) serverProfiling.update("wordcache", Long.valueOf(getBufferSize()), true);
-        }
-    }
-    
-    public void cleanupBuffer(int time) {
-        flushCacheUntil(System.currentTimeMillis() + time);
-    }
-    
-    private synchronized void flushCacheUntil(long timeout) {
-        while (System.currentTimeMillis() < timeout && buffer.size() > 0) {
-            flushCacheOne(buffer);
-        }
-    }
-    
-    private synchronized void flushCacheOne(final IndexBuffer<ReferenceType> ram) {
-        if (ram.size() > 0) collections.add(flushContainer(ram));
-    }
-    
-    private ReferenceContainer<ReferenceType> flushContainer(final IndexBuffer<ReferenceType> ram) {
-        byte[] wordHash;
-        ReferenceContainer<ReferenceType> c;
-        wordHash = ram.maxScoreWordHash();
-        c = ram.get(wordHash, null);
-        if ((c != null) && (c.size() > wCacheMaxChunk)) {
-            return ram.delete(wordHash);
-        } else {
-            return ram.delete(ram.bestFlushWordHash());
-        }
-    }
-
-    public int getBackendSize() {
-        return collections.size();
-    }
-    
-    public int getBufferSize() {
-        return buffer.size();
-    }
-
-    public ByteOrder ordering() {
-        return collections.ordering();
-    }
-    
-    public CloneableIterator<ReferenceContainer<ReferenceType>> references(byte[] startWordHash, boolean rot) {
-        final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.buffer.ordering().clone());
-        return new MergeIterator<ReferenceContainer<ReferenceType>>(
-                this.buffer.references(startWordHash, false),
-                this.collections.references(startWordHash, false),
-                containerOrder,
-                ReferenceContainer.containerMergeMethod,
-                true);
-    }
-
-}
--- a/source/de/anomic/kelondro/text/IndexBuffer.java
+++ b/source/de/anomic/kelondro/text/IndexBuffer.java
@ -1,348 +0,0 @@
-// IndexCache.java
-// (C) 2005, 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 2005 on http://yacy.net
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-// 
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package de.anomic.kelondro.text;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Set;
-
-import de.anomic.kelondro.index.Row;
-import de.anomic.kelondro.order.Base64Order;
-import de.anomic.kelondro.order.ByteOrder;
-import de.anomic.kelondro.order.CloneableIterator;
-import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
-import de.anomic.kelondro.util.MemoryControl;
-import de.anomic.kelondro.util.ScoreCluster;
-import de.anomic.kelondro.util.Log;
-
-/**
- * A IndexCache is a ReferenceContainerCache with an attached cache flush logic
- *
- */
-public final class IndexBuffer<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> implements Index<ReferenceType>, IndexReader<ReferenceType>, Iterable<ReferenceContainer<ReferenceType>> {
-
-    // class variables
-    private final ScoreCluster<byte[]> hashScore;
-    private final ScoreCluster<byte[]> hashDate;
-    private long  initTime;
-    private int   cacheEntityMaxCount;       // the maximum number of cache slots for RWI entries
-    public  int   cacheReferenceCountLimit;  // the maximum number of references to a single RWI entity
-    public  long  cacheReferenceAgeLimit;    // the maximum age (= time not changed) of a RWI entity
-    private final Log log;
-    private final File dumpFile;
-    private ReferenceContainerCache<ReferenceType> heap;
-    
-    @SuppressWarnings("unchecked")
-    public IndexBuffer(
-            final File databaseRoot,
-            final ReferenceFactory<ReferenceType> factory,
-            final ByteOrder wordOrdering,
-            final Row payloadrow,
-            final int entityCacheMaxSize,
-            final int wCacheReferenceCountLimitInit,
-            final long wCacheReferenceAgeLimitInit,
-            final String newHeapName,
-            final Log log) {
-        super(factory);
-        
-        // creates a new index cache
-        // the cache has a back-end where indexes that do not fit in the cache are flushed
-        this.hashScore = new ScoreCluster<byte[]>(Base64Order.enhancedCoder);
-        this.hashDate  = new ScoreCluster<byte[]>(Base64Order.enhancedCoder);
-        this.initTime = System.currentTimeMillis();
-        this.cacheEntityMaxCount = entityCacheMaxSize;
-        this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit;
-        this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit;
-        this.log = log;
-        this.dumpFile = new File(databaseRoot, newHeapName);
-        this.heap = new ReferenceContainerCache(factory, payloadrow, wordOrdering);
-        
-        // read in dump of last session
-        boolean initFailed = false;
-        if (dumpFile.exists()) try {
-            heap.initWriteModeFromBLOB(dumpFile);
-        } catch (IOException e) {
-            initFailed = true;
-            e.printStackTrace();
-        }
-        if (initFailed) {
-            log.logSevere("unable to restore cache dump");
-            // get empty dump
-            heap.initWriteMode();
-        } else if (dumpFile.exists()) {
-            // initialize scores for cache organization
-            for (final ReferenceContainer ic : (Iterable<ReferenceContainer>) heap.references(null, false)) {
-                this.hashDate.setScore(ic.getTermHash(), intTime(ic.lastWrote()));
-                this.hashScore.setScore(ic.getTermHash(), ic.size());
-            }
-        } else {
-            heap.initWriteMode();
-        }
-    }
-    
-    /**
-     * clear the content
-     * @throws IOException 
-     */
-    public void clear() {
-        hashScore.clear();
-        hashDate.clear();
-        initTime = System.currentTimeMillis();
-        heap.clear();
-    }
-
-    public int minMem() {
-        // there is no specific large array that needs to be maintained
-        // this value is just a guess of the possible overhead
-        return 100 * 1024; // 100 kb
-    }
-    
-    // cache settings
-    public int getBufferMaxReferences() {
-        if (hashScore.size() == 0) return 0;
-        return hashScore.getMaxScore();
-    }
-
-    public long getBufferMinAge() {
-        if (hashDate.size() == 0) return 0;
-        return System.currentTimeMillis() - longEmit(hashDate.getMaxScore());
-    }
-
-    public long getBufferMaxAge() {
-        if (hashDate.size() == 0) return 0;
-        return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
-    }
-
-    public void setMaxWordCount(final int maxWords) {
-        this.cacheEntityMaxCount = maxWords;
-    }
-    
-    public int getMaxWordCount() {
-        return this.cacheEntityMaxCount;
-    }
-    
-    public int size() {
-    	if (heap == null) return 0;
-        return heap.size();
-    }
-
-    public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startWordHash, final boolean rot) {
-        // we return an iterator object that creates top-level-clones of the indexContainers
-        // in the cache, so that manipulations of the iterated objects do not change
-        // objects in the cache.
-        return heap.references(startWordHash, rot);
-    }
-
-    public synchronized byte[] maxScoreWordHash() {
-        if (heap == null || heap.size() == 0) return null;
-        try {
-            return hashScore.getMaxObject();
-        } catch (final Exception e) {
-            log.logSevere("flushFromMem: " + e.getMessage(), e);
-        }
-        return null;
-    }
-    
-    public byte[] bestFlushWordHash() {
-        // select appropriate hash
-        // we have 2 different methods to find a good hash:
-        // - the oldest entry in the cache
-        // - the entry with maximum count
-        if (heap == null || heap.size() == 0) return null;
-        try {
-            //return hashScore.getMaxObject();
-            byte[] hash = null;
-            final int count = hashScore.getMaxScore();
-            if ((count >= cacheReferenceCountLimit) &&
-                ((hash = hashScore.getMaxObject()) != null)) {
-                // we MUST flush high-score entries, because a loop deletes entries in cache until this condition fails
-                // in this cache we MUST NOT check wCacheMinAge
-                return hash;
-            }
-            final long oldestTime = longEmit(hashDate.getMinScore());
-            if (((System.currentTimeMillis() - oldestTime) > cacheReferenceAgeLimit) &&
-                ((hash = hashDate.getMinObject()) != null)) {
-                // flush out-dated entries
-                return hash;
-            }
-            // cases with respect to memory situation
-            if (MemoryControl.free() < 100000) {
-                // urgent low-memory case
-                hash = hashScore.getMaxObject(); // flush high-score entries (saves RAM)
-            } else {
-                // not-efficient-so-far case. cleans up unnecessary cache slots
-                hash = hashDate.getMinObject(); // flush oldest entries
-            }
-            if (hash == null) {
-                final ReferenceContainer<ReferenceType> ic = heap.references(null, false).next();
-                if (ic != null) hash = ic.getTermHash();
-            }
-            return hash;
-            
-        } catch (final Exception e) {
-            log.logSevere("flushFromMem: " + e.getMessage(), e);
-        }
-        return null;
-    }
-
-    public synchronized ArrayList<ReferenceContainer<ReferenceType>> bestFlushContainers(final int count) {
-        final ArrayList<ReferenceContainer<ReferenceType>> containerList = new ArrayList<ReferenceContainer<ReferenceType>>();
-        byte[] hash;
-        ReferenceContainer<ReferenceType> container;
-        for (int i = 0; i < count; i++) {
-            hash = bestFlushWordHash();
-            if (hash == null) return containerList;
-            container = heap.delete(hash);
-            assert (container != null);
-            if (container == null) return containerList;
-            hashScore.deleteScore(hash);
-            hashDate.deleteScore(hash);
-            containerList.add(container);
-        }
-        return containerList;
-    }
-    
-    private int intTime(final long longTime) {
-        return (int) Math.max(0, ((longTime - initTime) / 1000));
-    }
-
-    private long longEmit(final int intTime) {
-        return (((long) intTime) * (long) 1000) + initTime;
-    }
-    
-    public boolean has(final byte[] wordHash) {
-        return heap.has(wordHash);
-    }
-    
-    public int count(byte[] key) {
-        return this.heap.count(key);
-    }
-    
-    public synchronized ReferenceContainer<ReferenceType> get(final byte[] wordHash, final Set<String> urlselection) {
-        if (wordHash == null) return null;
-        
-        // retrieve container
-        ReferenceContainer<ReferenceType> container = heap.get(wordHash, null);
-        
-        // We must not use the container from cache to store everything we find,
-        // as that container remains linked to in the cache and might be changed later
-        // while the returned container is still in use.
-        // create a clone from the container
-        if (container != null) container = container.topLevelClone();
-        
-        // select the urlselection
-        if ((urlselection != null) && (container != null)) container.select(urlselection);
-
-        return container;
-    }
-
-    public synchronized ReferenceContainer<ReferenceType> delete(final byte[] wordHash) {
-        // returns the index that had been deleted
-    	if (wordHash == null || heap == null) return null;
-        final ReferenceContainer<ReferenceType> container = heap.delete(wordHash);
-        hashScore.deleteScore(wordHash);
-        hashDate.deleteScore(wordHash);
-        return container;
-    }
-
-    public synchronized boolean remove(final byte[] wordHash, final String urlHash) {
-        final boolean removed = heap.remove(wordHash, urlHash);
-        if (removed) {
-            if (heap.has(wordHash)) {
-                hashScore.decScore(wordHash);
-                hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
-            } else {
-                hashScore.deleteScore(wordHash);
-                hashDate.deleteScore(wordHash);
-            }
-            return true;
-        }
-        return false;
-    }
-    
-    public synchronized int remove(final byte[] wordHash, final Set<String> urlHashes) {
-        if (urlHashes.size() == 0) return 0;
-        final int c = heap.remove(wordHash, urlHashes);
-        if (c > 0) {
-            // removal successful
-            if (heap.has(wordHash)) {
-                hashScore.addScore(wordHash, -c);
-                hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
-            } else {
-                hashScore.deleteScore(wordHash);
-                hashDate.deleteScore(wordHash);
-            }
-            return c;
-        }
-        return 0;
-    }
-    
-    public synchronized void add(final ReferenceContainer<ReferenceType> container) {
-        if (container == null || container.size() == 0 || heap == null) return;
-
-        // put new words into cache
-        heap.add(container);
-        hashScore.setScore(container.getTermHash(), heap.count(container.getTermHash()));
-        hashDate.setScore(container.getTermHash(), intTime(System.currentTimeMillis()));
-    }
-
-    public void add(final byte[] wordHash, final ReferenceType entry) throws IOException {
-        if (entry == null || heap == null) return;
-
-        // put new words into cache
-        heap.add(wordHash, entry);
-        hashScore.incScore(wordHash);
-        hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
-    }
-
-    public synchronized void close() {
-        heap.dump(this.dumpFile);
-        heap = null;
-        hashScore.clear();
-        hashDate.clear();
-    }
-
-    public Iterator<ReferenceContainer<ReferenceType>> iterator() {
-        return references(null, false);
-    }
-    
-    public ByteOrder ordering() {
-        return heap.ordering();
-    }
-
-    public synchronized long getBufferSizeBytes() {
-        // calculate the real size in bytes of the index cache
-        long cacheBytes = 0;
-        final long entryBytes = WordReferenceRow.urlEntryRow.objectsize;
-        final Iterator<ReferenceContainer<ReferenceType>> it = references(null, false);
-        while (it.hasNext()) cacheBytes += it.next().size() * entryBytes;
-        return cacheBytes;
-    }
-
-}
--- a/source/de/anomic/kelondro/text/IndexCollection.java
+++ b/source/de/anomic/kelondro/text/IndexCollection.java
@ -60,7 +60,7 @@ import de.anomic.kelondro.util.kelondroOutOfLimitsException;
 import de.anomic.kelondro.util.Log;
 import de.anomic.yacy.yacyURL;

-public class IndexCollection<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> implements Index<ReferenceType> {
+public class IndexCollection<ReferenceType extends Reference> extends AbstractIndex<ReferenceType> {

 	private static final int loadfactor = 4;
    private static final int serialNumber = 0;
--- a/source/de/anomic/kelondro/text/IndexCollectionMigration.java
+++ b/source/de/anomic/kelondro/text/IndexCollectionMigration.java
@ -107,7 +107,7 @@ public final class IndexCollectionMigration<ReferenceType extends Reference> ext
                        factory,
                        12,
                        Base64Order.enhancedCoder,
-                        BufferedIndexCollection.maxCollectionPartition, 
+                        7, 
                        WordReferenceRow.urlEntryRow, 
                        false);
            if (this.collections.size() == 0) {
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -321,7 +321,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        final boolean useCommons = getConfigBool("index.storeCommons", false);
        final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);        
        final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
-        final boolean useCell = sb.getConfigBool("useCell", false);
        try {
 			webIndex = new plasmaWordIndex(
 			        networkName,
@ -331,8 +330,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
 			        wordCacheMaxCount,
 			        useCommons,
 			        redundancy,
-			        paritionExponent,
-			        useCell);
+			        paritionExponent);
 		} catch (IOException e1) {
 			e1.printStackTrace();
 			webIndex = null;
@ -800,7 +798,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
            final boolean useCommons = getConfigBool("index.storeCommons", false);
            final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
            final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
-            final boolean useCell = sb.getConfigBool("useCell", false);
            try {
 				this.webIndex = new plasmaWordIndex(
 				        getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""),
@ -810,8 +807,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
 				        wordCacheMaxCount,
 				        useCommons,
 				        redundancy,
-				        paritionExponent,
-				        useCell);
+				        paritionExponent);
 			} catch (IOException e) {
 				e.printStackTrace();
 				this.webIndex = null;
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -45,7 +45,6 @@ import de.anomic.kelondro.blob.BLOBArray;
 import de.anomic.kelondro.order.Base64Order;
 import de.anomic.kelondro.order.ByteOrder;
 import de.anomic.kelondro.text.BufferedIndex;
-import de.anomic.kelondro.text.BufferedIndexCollection;
 import de.anomic.kelondro.text.IndexCell;
 import de.anomic.kelondro.text.IndexCollectionMigration;
 import de.anomic.kelondro.text.ReferenceContainer;
@ -123,8 +122,7 @@ public final class plasmaWordIndex {
            final int entityCacheMaxSize,
            final boolean useCommons,
            final int redundancy,
-            final int partitionExponent,
-            final boolean useCell) throws IOException {
+            final int partitionExponent) throws IOException {
        
        log.logInfo("Initializing Word Index for the network '" + networkName + "', word hash cache size is " + Word.hashCacheSize + ".");
                        
@ -154,10 +152,9 @@ public final class plasmaWordIndex {
        
        // check if the peer has migrated the index
        if (new File(indexPrimaryTextLocation, "RICOLLECTION").exists()) {
-            this.merger = (useCell) ? new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1) : null;
+            this.merger = new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1);
            if (this.merger != null) this.merger.start();
-            this.index = (useCell) ? 
-                                    new IndexCollectionMigration<WordReference>(
+            this.index = new IndexCollectionMigration<WordReference>(
                                    indexPrimaryTextLocation,
                                    wordReferenceFactory,
                                    wordOrder,
@ -166,16 +163,6 @@ public final class plasmaWordIndex {
                                    targetFileSize,
                                    maxFileSize,
                                    this.merger,
-                                    log)
-                                   :
-                                    new BufferedIndexCollection<WordReference>(
-                                            indexPrimaryTextLocation,
-                                            wordReferenceFactory,
-                                            wordOrder,
-                                            WordReferenceRow.urlEntryRow,
-                                            entityCacheMaxSize,
-                                            useCommons, 
-                                            redundancy,
                                    log);
        } else {
            this.merger = new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1);
--- a/source/de/anomic/yacy/dht/Dispatcher.java
+++ b/source/de/anomic/yacy/dht/Dispatcher.java
@ -328,6 +328,7 @@ public class Dispatcher {
     * This method returns true if a container was dequeued, false if not
     */
    public boolean dequeueContainer() {
+    	if (transmissionCloud == null) return false;
        if (this.indexingTransmissionProcessor.queueSize() > indexingTransmissionProcessor.concurrency()) return false;
        byte[] maxtarget = null;
        int maxsize = -1;
--- a/source/yacy.java
+++ b/source/yacy.java
@ -675,7 +675,7 @@ public final class yacy {
            final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
                
-            final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0, false);
+            final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
            final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA".getBytes(), false, false);
            
            long urlCounter = 0, wordCounter = 0;
@ -866,7 +866,7 @@ public final class yacy {
        try {
            Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
            if (resource.equals("all")) {
-                WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0, false);
+                WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
                indexContainerIterator = WordIndex.index().references(wordChunkStartHash.getBytes(), false, false);
            }
            int counter = 0;