attempt to fix a deadlock situation where the IODispatcher did not work.

I suspect the dispatcher thread has crashed and queues filled so no indexing process was able to write data. This fix tries to heal the problem, but I am unsure if it helps. To get a better view of the problem, some more log outputs had been inserted. Added also a new attribut indexer.threads to get a control over the number of default threads for the indexer (default is 1) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5866 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · c10c257255
parent 09987e93fd
commit c10c257255
5 changed files with 50 additions and 34 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -633,8 +633,7 @@ javastart_priority=10
 # wordCacheMaxLow/High is the number of word indexes that shall be held in the
 # ram cache during indexing. When YaCy is shut down, this cache must be
 # flushed to disc; this may last some minutes.
-wordCacheMaxCount = 30000
-wordCacheMaxCount__pro = 100000
+wordCacheMaxCount = 100000

 # Specifies if yacy can be used as transparent http proxy.
 # 
@ -684,26 +683,23 @@ crawler.http.acceptEncoding=gzip
 crawler.http.acceptLanguage=en-us,en;q=0.5
 crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
 crawler.http.maxFileSize=262144
-crawler.http.maxFileSize__pro=262144

 # ftp crawler specific settings; size in bytes
 crawler.ftp.maxFileSize=262144
-crawler.ftp.maxFileSize__pro=262144

 # maximum number of crawler threads
 crawler.MaxActiveThreads = 30

 # maximum size of indexing queue
 indexer.slots = 40
-indexer.slots__pro = 80
+indexer.threads = 1

 # maximum size of stacker queue
 stacker.slots = 2000

 # specifies if yacy should set it's own referer if no referer URL
 # was set by the client.
-useYacyReferer = true
-useYacyReferer__pro = false
+useYacyReferer = false

 # allow only 443(https-port) for https-proxy?
 # if you want to tunnel other protocols, set to false
--- a/source/de/anomic/kelondro/text/IODispatcher.java
+++ b/source/de/anomic/kelondro/text/IODispatcher.java
@ -30,6 +30,7 @@ import java.util.concurrent.ArrayBlockingQueue;

 import de.anomic.kelondro.blob.BLOBArray;
 import de.anomic.kelondro.index.Row;
+import de.anomic.kelondro.util.Log;

 /**
 * this is a concurrent merger that can merge single files that are queued for merging.
@ -63,7 +64,7 @@ public class IODispatcher <ReferenceType extends Reference> extends Thread {
    }
    
    public synchronized void terminate() {
-        if (termQueue != null && this.isAlive()) {
+        if (termQueue != null && controlQueue != null && this.isAlive()) {
            try {
                controlQueue.put(poison);
            } catch (InterruptedException e) {
@ -79,7 +80,7 @@ public class IODispatcher <ReferenceType extends Reference> extends Thread {
    }
    
    public synchronized void dump(ReferenceContainerCache<ReferenceType> cache, File file, ReferenceContainerArray<ReferenceType> array) {
-        if (dumpQueue == null || !this.isAlive()) {
+        if (dumpQueue == null || controlQueue == null || !this.isAlive()) {
            cache.dump(file);
        } else {
            DumpJob job = new DumpJob(cache, file, array);
@ -94,11 +95,11 @@ public class IODispatcher <ReferenceType extends Reference> extends Thread {
    }
    
    public synchronized int queueLength() {
-        return controlQueue.size();
+        return (controlQueue == null) ? 0 : controlQueue.size();
    }
    
    public synchronized void merge(File f1, File f2, BLOBArray array, Row payloadrow, File newFile) {
-        if (mergeQueue == null || !this.isAlive()) {
+        if (mergeQueue == null || controlQueue == null || !this.isAlive()) {
            try {
                array.mergeMount(f1, f2, factory, payloadrow, newFile);
            } catch (IOException e) {
@ -127,21 +128,34 @@ public class IODispatcher <ReferenceType extends Reference> extends Thread {
            loop: while (controlQueue.take() != poison) {
                // prefer dump actions to flush memory to disc
                if (dumpQueue.size() > 0) {
-                    dumpJob = dumpQueue.take();
-                    dumpJob.dump();
+                    try {
+                        dumpJob = dumpQueue.take();
+                        dumpJob.dump();
+                    } catch (InterruptedException e) {
+                        e.printStackTrace();
+                        Log.logSevere("IODispatcher", "main run job was interrupted (1)", e);
+                    }
                    continue loop;
                }
                // otherwise do a merge operation
                if (mergeQueue.size() > 0) {
-                    mergeJob = mergeQueue.take();
-                    mergeJob.merge();
+                    try {
+                        mergeJob = mergeQueue.take();
+                        mergeJob.merge();
+                    } catch (InterruptedException e) {
+                        e.printStackTrace();
+                        Log.logSevere("IODispatcher", "main run job was interrupted (2)", e);
+                    }
                    continue loop;
                }
                assert false; // this should never happen
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
+            Log.logSevere("IODispatcher", "main run job was interrupted (3)", e);
        } finally {
+            Log.logInfo("IODispatcher", "terminating run job");
+            controlQueue = null;
            try {
                termQueue.put(poison);
            } catch (InterruptedException e) {
--- a/source/de/anomic/kelondro/text/ReferenceContainerCache.java
+++ b/source/de/anomic/kelondro/text/ReferenceContainerCache.java
@ -466,34 +466,38 @@ public final class ReferenceContainerCache<ReferenceType extends Reference> exte
        return 0;
    }
 
-    public synchronized void add(final ReferenceContainer<ReferenceType> container) {
+    public void add(final ReferenceContainer<ReferenceType> container) {
        // this puts the entries into the cache
        if (this.cache == null || container == null || container.size() == 0) return;
        
        // put new words into cache
        ByteArray tha = new ByteArray(container.getTermHash());
-        ReferenceContainer<ReferenceType> entries = cache.get(tha); // null pointer exception? wordhash != null! must be cache==null
-        int added = 0;
-        if (entries == null) {
-            entries = container.topLevelClone();
-            added = entries.size();
-        } else {
-            added = entries.putAllRecent(container);
-        }
-        if (added > 0) {
-            cache.put(tha, entries);
+        synchronized (this) {
+            ReferenceContainer<ReferenceType> entries = cache.get(tha); // null pointer exception? wordhash != null! must be cache==null
+            int added = 0;
+            if (entries == null) {
+                entries = container.topLevelClone();
+                added = entries.size();
+            } else {
+                added = entries.putAllRecent(container);
+            }
+            if (added > 0) {
+                cache.put(tha, entries);
+            }
+            entries = null;
+            return;
        }
-        entries = null;
-        return;
    }

-    public synchronized void add(final byte[] termHash, final ReferenceType newEntry) {
+    public void add(final byte[] termHash, final ReferenceType newEntry) {
        assert this.cache != null;
        ByteArray tha = new ByteArray(termHash);
-        ReferenceContainer<ReferenceType> container = cache.get(tha);
-        if (container == null) container = new ReferenceContainer<ReferenceType>(factory, termHash, this.payloadrow, 1);
-        container.put(newEntry);
-        cache.put(tha, container);
+        synchronized (this) {
+            ReferenceContainer<ReferenceType> container = cache.get(tha);
+            if (container == null) container = new ReferenceContainer<ReferenceType>(factory, termHash, this.payloadrow, 1);
+            container.put(newEntry);
+            cache.put(tha, container);
+        }
    }

    public int minMem() {
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -596,11 +596,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        this.clusterhashes = this.webIndex.peers().clusterHashes(getConfig("cluster.peers.yacydomain", ""));
        
        // deploy blocking threads
+        int indexerThreads = (int) this.getConfigLong(plasmaSwitchboardConstants.INDEXER_THREADS, 1);
        indexingStorageProcessor      = new serverProcessor<indexingQueueEntry>(
                "storeDocumentIndex",
                "This is the sequencing step of the indexing queue: no concurrency is wanted here, because the access of the indexer works better if it is not concurrent. Files are written as streams, councurrency would destroy IO performance. In this process the words are written to the RWI cache, which flushes if it is full.",
                new String[]{"RWI/Cache/Collections"},
-                this, "storeDocumentIndex", serverProcessor.useCPU + 40, null, 1);
+                this, "storeDocumentIndex", serverProcessor.useCPU + 40, null, indexerThreads);
        indexingAnalysisProcessor     = new serverProcessor<indexingQueueEntry>(
                "webStructureAnalysis",
                "This just stores the link structure of the document into a web structure database.",
--- a/source/de/anomic/plasma/plasmaSwitchboardConstants.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardConstants.java
@ -123,6 +123,7 @@ public final class plasmaSwitchboardConstants {
    public static final String INDEXER_METHOD_JOBCOUNT      = "queueSize";
    public static final String INDEXER_METHOD_FREEMEM       = "deQueueFreeMem";
    public static final String INDEXER_SLOTS                = "indexer.slots";
+    public static final String INDEXER_THREADS              = "indexer.threads";
    // 90_cleanup
    /**
     * <p><code>public static final String <strong>CLEANUP</strong> = "90_cleanup"</code></p>