From a9c8046c873361ad02c4c86cfede3dee09532783 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 13 Jul 2013 19:09:46 +0200 Subject: [PATCH] do a light optimization at the end of a crawl postprocessing --- source/net/yacy/search/Switchboard.java | 6 ++++-- .../yacy/search/schema/CollectionConfiguration.java | 7 ++++--- .../net/yacy/search/schema/WebgraphConfiguration.java | 11 ++++++----- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 7d6ab8372..12cde50c8 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2283,8 +2283,10 @@ public final class Switchboard extends serverSwitch { if (this.crawlQueues.coreCrawlJobSize() == 0) { if (this.crawlQueues.noticeURL.isEmpty()) this.crawlQueues.noticeURL.clear(); // flushes more caches postprocessingRunning = true; - index.fulltext().getDefaultConfiguration().postprocessing(index); - index.fulltext().getWebgraphConfiguration().postprocessing(index); + int proccount = 0; + proccount += index.fulltext().getDefaultConfiguration().postprocessing(index); + proccount += index.fulltext().getWebgraphConfiguration().postprocessing(index); + if (proccount > 0) index.fulltext().optimize(8); postprocessingRunning = false; } diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index bdffb303d..c6dab8b62 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -877,9 +877,9 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri * @param urlCitation * @return */ - public void postprocessing(final Segment segment) { - if (!this.contains(CollectionSchema.process_sxt)) return; - if (!segment.connectedCitation()) return; + public int postprocessing(final Segment segment) { + if (!this.contains(CollectionSchema.process_sxt)) return 0; + if (!segment.connectedCitation()) return 0; SolrConnector connector = segment.fulltext().getDefaultConnector(); connector.commit(true); // make sure that we have latest information that can be found ReferenceReportCache rrCache = segment.getReferenceReportCache(); @@ -967,6 +967,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri proccount_citationchange + " citation ranking changes."); } catch (InterruptedException e) { } + return proccount; } private static final class CRV { diff --git a/source/net/yacy/search/schema/WebgraphConfiguration.java b/source/net/yacy/search/schema/WebgraphConfiguration.java index e760bedcc..47d5f27a9 100644 --- a/source/net/yacy/search/schema/WebgraphConfiguration.java +++ b/source/net/yacy/search/schema/WebgraphConfiguration.java @@ -276,9 +276,9 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial } } - public void postprocessing(Segment segment) { - if (!this.contains(WebgraphSchema.process_sxt)) return; - if (!segment.connectedCitation()) return; + public int postprocessing(Segment segment) { + if (!this.contains(WebgraphSchema.process_sxt)) return 0; + if (!segment.connectedCitation()) return 0; SolrConnector connector = segment.fulltext().getWebgraphConnector(); // that means we must search for those entries. connector.commit(true); // make sure that we have latest information that can be found @@ -288,7 +288,7 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial SolrDocument doc; String protocol, urlstub, id; DigestURI url; - int proccount = 0, proccount_clickdepthchange = 0, proccount_referencechange = 0; + int proccount = 0, proccount_clickdepthchange = 0; try { while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { // for each to-be-processed entry work on the process tag @@ -328,9 +328,10 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial } } - ConcurrentLog.info("WebgraphConfiguration", "cleanup_processing: re-calculated " + proccount+ " new documents, " + proccount_clickdepthchange + " clickdepth values changed, " + proccount_referencechange + " reference-count values changed."); + ConcurrentLog.info("WebgraphConfiguration", "cleanup_processing: re-calculated " + proccount + " new documents, " + proccount_clickdepthchange + " clickdepth values changed."); } catch (InterruptedException e) { } + return proccount; } /**