From 8e5ce7cd514f608dd88dabeb9c3e9dd65bae6edd Mon Sep 17 00:00:00 2001
From: orbiter <mc@yacy.net>
Date: Wed, 23 Apr 2014 23:13:07 +0200
Subject: [PATCH] fixed a situation where finished crawls had not been
 detected.

---
 source/net/yacy/search/Switchboard.java | 59 ++++++++++++++-----------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 111c716d0..5fe44bcd3 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2296,46 +2296,55 @@ public final class Switchboard extends serverSwitch {
             // clean up profiles
             checkInterruption();
 
-            // if no crawl is running and processing is activated:
             // execute the (post-) processing steps for all entries that have a process tag assigned
             Fulltext fulltext = index.fulltext();
             CollectionConfiguration collection1Configuration = fulltext.getDefaultConfiguration();
             boolean allCrawlsFinished = this.crawler.allCrawlsFinished(this.crawlQueues);
             int proccount = 0;
-            if (!this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) && MemoryControl.available() > 512L * 1024L * 1024L && Memory.load() < 2.5f) {
-                
-                // we optimize first because that is useful for postprocessing
-                ReferenceReportCache rrCache = index.getReferenceReportCache();
-                Set<String> deletionCandidates = collection1Configuration.contains(CollectionSchema.harvestkey_s.getSolrFieldName()) ?
-                        this.crawler.getFinishesProfiles(this.crawlQueues) : new HashSet<String>();
-                int cleanupByHarvestkey = deletionCandidates.size();
-                boolean postprocessing =  collection1Configuration.contains(CollectionSchema.process_sxt) && (index.connectedCitation() || fulltext.useWebgraph());
-                if (postprocessing && (cleanupByHarvestkey > 0 || allCrawlsFinished)) {
-                    if (cleanupByHarvestkey > 0) {
-                        // run postprocessing on these profiles
-                        postprocessingRunning = true;
-                        postprocessingStartTime[0] = System.currentTimeMillis();
-                        try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
-                        for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, profileHash);
-                        postprocessingStartTime[0] = 0;
-                        try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know
+    
+            if (!this.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
+
+                boolean postprocessing =
+                        collection1Configuration.contains(CollectionSchema.process_sxt) &&
+                        (index.connectedCitation() || fulltext.useWebgraph()) &&
+                        MemoryControl.available() > 512L * 1024L * 1024L &&
+                        Memory.load() < 2.5f;
                         
-                        this.crawler.cleanProfiles(deletionCandidates);
-                        log.info("cleanup removed " + cleanupByHarvestkey + " crawl profiles, post-processed " + proccount + " documents");
-                    } else if (allCrawlsFinished) {
+                if (allCrawlsFinished) {
+                    if (postprocessing) {
                         // run postprocessing on all profiles
                         postprocessingRunning = true;
                         postprocessingStartTime[0] = System.currentTimeMillis();
                         try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
+                        ReferenceReportCache rrCache = index.getReferenceReportCache();
                         proccount += collection1Configuration.postprocessing(index, rrCache, null);
                         postprocessingStartTime[0] = 0;
                         try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know
-
-                        this.crawler.cleanProfiles(this.crawler.getActiveProfiles());
-                        log.info("cleanup post-processed " + proccount + " documents");
+                        this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring
                     }
+                    this.crawler.cleanProfiles(this.crawler.getActiveProfiles());
+                    log.info("cleanup post-processed " + proccount + " documents");
+                } else {
+                    Set<String> deletionCandidates = collection1Configuration.contains(CollectionSchema.harvestkey_s.getSolrFieldName()) ?
+                            this.crawler.getFinishesProfiles(this.crawlQueues) : new HashSet<String>();
+                    int cleanupByHarvestkey = deletionCandidates.size();
+                    if (cleanupByHarvestkey > 0) {
+                        if (postprocessing) {
+                            // run postprocessing on these profiles
+                            postprocessingRunning = true;
+                            postprocessingStartTime[0] = System.currentTimeMillis();
+                            try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {}
+                            ReferenceReportCache rrCache = index.getReferenceReportCache();
+                            for (String profileHash: deletionCandidates) proccount += collection1Configuration.postprocessing(index, rrCache, profileHash);
+                            postprocessingStartTime[0] = 0;
+                            try {postprocessingCount[0] = (int) fulltext.getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);} catch (IOException e) {} // should be zero but you never know
+                            this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring
+                        }
+                        this.crawler.cleanProfiles(deletionCandidates);
+                        log.info("cleanup removed " + cleanupByHarvestkey + " crawl profiles, post-processed " + proccount + " documents");
+                    } 
                 }
-                this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring
+                
                 postprocessingStartTime = new long[]{0,0}; // the start time for the processing; not started = 0                
                 postprocessingRunning = false;
             }