From 19a051bec858cd9a891d58be5fb3aeb3127c3391 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 16 Nov 2013 18:23:14 +0100 Subject: [PATCH] more monitoring for postprocessing and enhanced layout in Crawler monitor page --- htroot/Crawler_p.html | 35 +++++++++++-------- htroot/Crawler_p.java | 25 +++++++++---- htroot/api/status_p.java | 27 +++++++------- htroot/api/status_p.xml | 16 ++++----- htroot/js/Crawler.js | 13 +++++-- .../net/yacy/document/parser/pdfParser.java | 2 +- source/net/yacy/search/Switchboard.java | 4 +-- 7 files changed, 72 insertions(+), 50 deletions(-) diff --git a/htroot/Crawler_p.html b/htroot/Crawler_p.html index 2a2184714..ab3c37058 100644 --- a/htroot/Crawler_p.html +++ b/htroot/Crawler_p.html @@ -21,7 +21,7 @@ #%env/templates/submenuCrawlMonitor.template%#

Crawler

-
+
Queues @@ -75,7 +75,7 @@
 #[queuemessage]#
-
+
Index Size @@ -85,12 +85,12 @@ - + - + @@ -107,17 +107,17 @@
Seg-
ments
Documents
solr search api
Documents
solr search api
#[urlpublictextSize]# #[urlpublictextSegmentCount]#
Webgraph Edges
solr search api
Webgraph Edges
solr search api
#[webgraphSize]# #[webgraphSegmentCount]#
-
+
Progress
- + - + - + + + + + + + + + + + @@ -141,13 +153,6 @@ - - - - - - -
Indicator
 
Indicator
 
Level
 
SpeedSpeed / PPM
(Pages Per Minute)
PPM @@ -125,12 +125,24 @@
PPM (Pages Per Minute)Crawler PPM    
Postprocessing Progress      + +
0:0      
Traffic (Crawler)     MB     
Postprocessing            
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 03558f25e..01fc61757 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -55,6 +55,8 @@ import net.yacy.peers.NewsPool; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; +import net.yacy.search.index.Fulltext; +import net.yacy.search.index.Segment; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -69,12 +71,23 @@ public class Crawler_p { // inital values for AJAX Elements (without JavaScript) final serverObjects prop = new serverObjects(); prop.put("rejected", 0); - prop.put("urlpublictextSize", 0); - prop.put("urlpublictextSegmentCount", 0); - prop.put("webgraphSize", 0); - prop.put("webgraphSegmentCount", 0); - prop.put("rwipublictextSize", 0); - prop.put("rwipublictextSegmentCount", 0); + + Segment segment = sb.index; + Fulltext fulltext = segment.fulltext(); + String localSolr = "/solr/select?core=collection1&q=*:*&start=0&rows=3"; + String remoteSolr = env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, localSolr); + if (!remoteSolr.endsWith("/")) remoteSolr = remoteSolr + "/"; + prop.put("urlpublictextSolrURL", fulltext.connectedLocalSolr() ? localSolr : remoteSolr + "collection1/select?&q=*:*&start=0&rows=3"); + prop.putNum("urlpublictextSize", fulltext.collectionSize()); + prop.putNum("urlpublictextSegmentCount", fulltext.getDefaultConnector().getSegmentCount()); + prop.put("webgraphSolrURL", fulltext.connectedLocalSolr() ? localSolr.replace("collection1", "webgraph") : remoteSolr + "webgraph/select?&q=*:*&start=0&rows=3"); + prop.putNum("webgraphSize", fulltext.writeToWebgraph() ? fulltext.webgraphSize() : 0); + prop.putNum("webgraphSegmentCount", fulltext.writeToWebgraph() ? fulltext.getWebgraphConnector().getSegmentCount() : 0); + prop.putNum("citationSize", segment.citationCount()); + prop.putNum("citationSegmentCount", segment.citationSegmentCount()); + prop.putNum("rwipublictextSize", segment.RWICount()); + prop.putNum("rwipublictextSegmentCount", segment.RWISegmentCount()); + prop.put("list", "0"); prop.put("loaderSize", 0); prop.put("loaderMax", 0); diff --git a/htroot/api/status_p.java b/htroot/api/status_p.java index 6b21731ec..d9f52a3b2 100644 --- a/htroot/api/status_p.java +++ b/htroot/api/status_p.java @@ -37,6 +37,7 @@ import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; +import net.yacy.search.index.Fulltext; import net.yacy.search.index.Segment; import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.WebgraphSchema; @@ -55,11 +56,12 @@ public class status_p { final boolean html = post != null && post.containsKey("html"); prop.setLocalized(html); Segment segment = sb.index; + Fulltext fulltext = segment.fulltext(); prop.put("rejected", "0"); sb.updateMySeed(); final int cacheMaxSize = (int) sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 10000); - prop.putNum("ppm", Switchboard.currentPPM()); + prop.put("ppm", Switchboard.currentPPM()); // we don't format the ppm here because that will cause that the progress bar shows nothing if the number is > 999 prop.putNum("qpm", sb.peers.mySeed().getQPM()); prop.putNum("wordCacheSize", segment.RWIBufferCount()); prop.putNum("wordCacheMaxSize", cacheMaxSize); @@ -77,10 +79,10 @@ public class status_p { prop.put("trafficCrawler", ByteCount.getAccountCount(ByteCount.CRAWLER)); // index size - prop.putNum("urlpublictextSize", segment.fulltext().collectionSize()); - prop.putNum("urlpublictextSegmentCount", segment.fulltext().getDefaultConnector().getSegmentCount()); - prop.putNum("webgraphSize", segment.fulltext().writeToWebgraph() ? segment.fulltext().webgraphSize() : 0); - prop.putNum("webgraphSegmentCount", segment.fulltext().writeToWebgraph() ? segment.fulltext().getWebgraphConnector().getSegmentCount() : 0); + prop.putNum("urlpublictextSize", fulltext.collectionSize()); + prop.putNum("urlpublictextSegmentCount", fulltext.getDefaultConnector().getSegmentCount()); + prop.putNum("webgraphSize", fulltext.writeToWebgraph() ? fulltext.webgraphSize() : 0); + prop.putNum("webgraphSegmentCount", fulltext.writeToWebgraph() ? fulltext.getWebgraphConnector().getSegmentCount() : 0); prop.putNum("citationSize", segment.citationCount()); prop.putNum("citationSegmentCount", segment.citationSegmentCount()); prop.putNum("rwipublictextSize", segment.RWICount()); @@ -143,11 +145,6 @@ public class status_p { int collectionRemainingTimeMinutes = (int) (collectionRemainingTime / 60000); int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000); - prop.put("postprocessingCollectionRemainingCount", collectionRemainingCount); - prop.put("postprocessingRunning_collectionSpeed", collectionSpeed); - prop.put("postprocessingRunning_collectionRemainingTimeMinutes", collectionRemainingTimeMinutes); - prop.put("postprocessingRunning_collectionRemainingTimeSeconds", collectionRemainingTimeSeconds); - long webgraphRemainingCount = 0; if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0; @@ -156,10 +153,14 @@ public class status_p { int webgraphRemainingTimeMinutes = (int) (webgraphRemainingTime / 60000); int webgraphRemainingTimeSeconds = (int) ((webgraphRemainingTime - (webgraphRemainingTimeMinutes * 60000)) / 1000); + prop.put("postprocessingCollectionRemainingCount", collectionRemainingCount); prop.put("postprocessingWebgraphRemainingCount", webgraphRemainingCount); - prop.put("postprocessingRunning_webgraphSpeed", webgraphSpeed); - prop.put("postprocessingRunning_webgraphRemainingTimeMinutes", webgraphRemainingTimeMinutes); - prop.put("postprocessingRunning_webgraphRemainingTimeSeconds", webgraphRemainingTimeSeconds); + prop.put("postprocessingRunning_activity", collectionTimeSinceStart > 0 ? "collection" : "webgraph"); + prop.put("postprocessingSpeed", collectionTimeSinceStart > 0 ? collectionSpeed : webgraphSpeed); + prop.put("postprocessingElapsedTime", collectionTimeSinceStart > 0 ? collectionTimeSinceStart : webgraphTimeSinceStart); + prop.put("postprocessingRemainingTime", collectionTimeSinceStart > 0 ? collectionRemainingTime : webgraphRemainingTime); + prop.put("postprocessingRemainingTimeMinutes", collectionTimeSinceStart > 0 ? collectionRemainingTimeMinutes : webgraphRemainingTimeMinutes); + prop.put("postprocessingRemainingTimeSeconds", collectionTimeSinceStart > 0 ? collectionRemainingTimeSeconds : webgraphRemainingTimeSeconds); // return rewrite properties return prop; diff --git a/htroot/api/status_p.xml b/htroot/api/status_p.xml index 83dd9f00b..9da652ada 100644 --- a/htroot/api/status_p.xml +++ b/htroot/api/status_p.xml @@ -74,16 +74,12 @@ #[postprocessingCollectionRemainingCount]# #[postprocessingWebgraphRemainingCount]# - #(postprocessingRunning)# - idle:: - busy - #[collectionSpeed]# - #[collectionRemainingTimeMinutes]# - #[collectionRemainingTimeSeconds]# - #[webgraphSpeed]# - #[webgraphRemainingTimeMinutes]# - #[webgraphRemainingTimeSeconds]# - #(/postprocessingRunning)# + #(postprocessingRunning)#idle::busy:#[activity]##(/postprocessingRunning)# + #[postprocessingSpeed]# + #[postprocessingElapsedTime]# + #[postprocessingRemainingTime]# + #[postprocessingRemainingTimeMinutes]# + #[postprocessingRemainingTimeSeconds]# \ No newline at end of file diff --git a/htroot/js/Crawler.js b/htroot/js/Crawler.js index 0d20138db..60b098357 100644 --- a/htroot/js/Crawler.js +++ b/htroot/js/Crawler.js @@ -108,9 +108,16 @@ function handleStatus(){ postprocessing=getFirstChild(statusTag, "postprocessing"); document.getElementById("postprocessing_status").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "status")); - document.getElementById("postprocessing_collection").firstChild.nodeValue="collection: " + getValue(getFirstChild(postprocessing, "collectionRemainingCount")); - document.getElementById("postprocessing_webgraph").firstChild.nodeValue="webgraph: " + getValue(getFirstChild(postprocessing, "webgraphRemainingCount")); - document.getElementById("postprocessing_time").firstChild.nodeValue=""; + document.getElementById("postprocessing_collection").firstChild.nodeValue="pending in collection: " + getValue(getFirstChild(postprocessing, "collectionRemainingCount")); + document.getElementById("postprocessing_webgraph").firstChild.nodeValue="pending in webgraph: " + getValue(getFirstChild(postprocessing, "webgraphRemainingCount")); + document.getElementById("postprocessing_remainingTimeMinutes").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "remainingTimeMinutes")); + document.getElementById("postprocessing_remainingTimeSeconds").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "remainingTimeSeconds")); + postprocessingElapsedTime=getValue(getFirstChild(postprocessing, "postprocessingElapsedTime")); + postprocessingRemainingTime=getValue(getFirstChild(postprocessing, "postprocessingRemainingTime")); + p = 100 * postprocessingElapsedTime / (postprocessingElapsedTime + postprocessingRemainingTime); + bar=""; + document.getElementById("postprocessing_bar").firstChild.nodeValue=bar; + //document.getElementById("postprocessing_speed").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "speed")); load=getFirstChild(statusTag, "load"); document.getElementById("load").firstChild.nodeValue=getValue(load); diff --git a/source/net/yacy/document/parser/pdfParser.java b/source/net/yacy/document/parser/pdfParser.java index d74114180..812674b7a 100644 --- a/source/net/yacy/document/parser/pdfParser.java +++ b/source/net/yacy/document/parser/pdfParser.java @@ -255,7 +255,7 @@ public class pdfParser extends AbstractParser implements Parser { if (clearResources != null) clearResources.invoke(null); } } catch (Throwable e) { - e.printStackTrace(); + //e.printStackTrace(); } } } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 5a2ae8319..f618481ef 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2368,8 +2368,8 @@ public final class Switchboard extends serverSwitch { log.info("cleanup post-processed " + proccount + " documents"); } } - postprocessingStartTime = new long[]{0,0}; // the start time for the processing; not started = 0 - + this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring + postprocessingStartTime = new long[]{0,0}; // the start time for the processing; not started = 0 postprocessingRunning = false; }