more monitoring for postprocessing and enhanced layout in Crawler

monitor page
pull/1/head
orbiter 11 years ago
parent 9cf9727685
commit 19a051bec8

@ -21,7 +21,7 @@
#%env/templates/submenuCrawlMonitor.template%# #%env/templates/submenuCrawlMonitor.template%#
<h2>Crawler</h2> <h2>Crawler</h2>
<noscript><p>(Please enable JavaScript to automatically update this page!)</p></noscript> <noscript><p>(Please enable JavaScript to automatically update this page!)</p></noscript>
<fieldset style="width:180px;height:180px;float:left;"> <fieldset style="width:180px;height:190px;float:left;">
<legend>Queues</legend> <legend>Queues</legend>
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler"> <table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody> <tbody>
@ -75,7 +75,7 @@
</table> </table>
<div class="warning" id="message">&nbsp;#[queuemessage]#<div> <div class="warning" id="message">&nbsp;#[queuemessage]#<div>
</fieldset> </fieldset>
<fieldset style="width:220px;height:180px;float:left;"> <fieldset style="width:270px;height:190px;float:left;">
<legend>Index Size</legend> <legend>Index Size</legend>
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler"> <table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody> <tbody>
@ -85,12 +85,12 @@
<th width="40">Seg-<br/>ments</th> <th width="40">Seg-<br/>ments</th>
</tr> </tr>
<tr class="TableCellLight"> <tr class="TableCellLight">
<td align="left">Documents<br/><a href="/solr/select?core=collection1&q=*:*&start=0&rows=3">solr search api</a></td> <td align="left">Documents<br/><a href="#[urlpublictextSolrURL]#">solr search api</a></td>
<td align="right"><span id="urlpublictextSize">#[urlpublictextSize]#</span></td> <td align="right"><span id="urlpublictextSize">#[urlpublictextSize]#</span></td>
<td align="right"><span id="urlpublictextSegmentCount">#[urlpublictextSegmentCount]#</span></td> <td align="right"><span id="urlpublictextSegmentCount">#[urlpublictextSegmentCount]#</span></td>
</tr> </tr>
<tr class="TableCellLight"> <tr class="TableCellLight">
<td align="left">Webgraph Edges<br/><a href="/solr/select?core=webgraph&q=*:*&start=0&rows=3">solr search api</a></td> <td align="left">Webgraph Edges<br/><a href="#[webgraphSolrURL]#">solr search api</a></td>
<td align="right"><span id="webgraphSize">#[webgraphSize]#</span></td> <td align="right"><span id="webgraphSize">#[webgraphSize]#</span></td>
<td align="right"><span id="webgraphSegmentCount">#[webgraphSegmentCount]#</span></td> <td align="right"><span id="webgraphSegmentCount">#[webgraphSegmentCount]#</span></td>
</tr> </tr>
@ -107,17 +107,17 @@
</tbody> </tbody>
</table> </table>
</fieldset> </fieldset>
<fieldset style="width:430px;height:180px;;float:left;"> <fieldset style="width:480px;height:190px;;float:left;">
<legend>Progress</legend> <legend>Progress</legend>
<form action="Crawler_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8"> <form action="Crawler_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler"> <table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody> <tbody>
<tr class="TableHeader"> <tr class="TableHeader">
<th width="100">Indicator<br/>&nbsp;</th> <th width="120">Indicator<br/>&nbsp;</th>
<th width="300" colspan="4">Level<br/>&nbsp;</th> <th width="300" colspan="4">Level<br/>&nbsp;</th>
</tr> </tr>
<tr class="TableCellLight"> <tr class="TableCellLight">
<td align="left">Speed</td> <td align="left">Speed / PPM<br/>(Pages Per Minute)</td>
<td align="left" colspan="4"> <td align="left" colspan="4">
<input #(crawlingSpeedMinChecked)#::class="TableCellDark"#(/crawlingSpeedMinChecked)# type="submit" name="crawlingPerformance" value="minimum" /> <input #(crawlingSpeedMinChecked)#::class="TableCellDark"#(/crawlingSpeedMinChecked)# type="submit" name="crawlingPerformance" value="minimum" />
<input #(crawlingSpeedCustChecked)#::class="TableCellDark"#(/crawlingSpeedCustChecked)# id="customPPM" name="customPPM" type="text" size="5" maxlength="5" value="#[customPPMdefault]#" />PPM <input type="submit" name="crawlingPerformance" value="custom" /> <input #(crawlingSpeedCustChecked)#::class="TableCellDark"#(/crawlingSpeedCustChecked)# id="customPPM" name="customPPM" type="text" size="5" maxlength="5" value="#[customPPMdefault]#" />PPM <input type="submit" name="crawlingPerformance" value="custom" />
@ -125,12 +125,24 @@
</td> </td>
</tr> </tr>
<tr class="TableCellLight"> <tr class="TableCellLight">
<td align="left">PPM (Pages Per Minute)</td> <td align="left">Crawler PPM</td>
<td align="left" width="40"><span id="ppmNum">&nbsp;&nbsp;&nbsp;</span></td> <td align="left" width="40"><span id="ppmNum">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left" width="260px" colspan="3"> <td align="left" width="260px" colspan="3">
<progress id="ppmbar" max="30000" value="0" style="width:94%;"/> <progress id="ppmbar" max="30000" value="0" style="width:94%;"/>
</td> </td>
</tr> </tr>
<tr class="TableCellLight">
<td align="left" valign="top" rowspan="2">Postprocessing Progress <span id="postprocessing_speed">&nbsp;</span></td>
<td align="left" width="40"><span id="postprocessing_status">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left" width="260px" colspan="3">
<span id="postprocessing_bar"><progress id="postprocessingBar" max="30000" value="0" style="width:94%;"/></span>
</td>
</tr>
<tr class="TableCellLight">
<td align="left"><span id="postprocessing_remainingTimeMinutes">0</span>:<span id="postprocessing_remainingTimeSeconds">0</span></td>
<td align="left"><span id="postprocessing_collection">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="postprocessing_webgraph">&nbsp;&nbsp;&nbsp;</span></td>
</tr>
<tr class="TableCellLight"> <tr class="TableCellLight">
<td align="left">Traffic (Crawler)</td> <td align="left">Traffic (Crawler)</td>
<td align="left"><span id="trafficCrawler">&nbsp;&nbsp;&nbsp;</span> MB</td> <td align="left"><span id="trafficCrawler">&nbsp;&nbsp;&nbsp;</span> MB</td>
@ -141,13 +153,6 @@
<td align="left"><span id="load">&nbsp;&nbsp;&nbsp;</span></td> <td align="left"><span id="load">&nbsp;&nbsp;&nbsp;</span></td>
<td colspan="3">&nbsp;</td> <td colspan="3">&nbsp;</td>
</tr> </tr>
<tr class="TableCellLight">
<td align="left">Postprocessing</td>
<td align="left"><span id="postprocessing_status">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="postprocessing_collection">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="postprocessing_webgraph">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="postprocessing_time">&nbsp;&nbsp;&nbsp;</span></td>
</tr>
</tbody> </tbody>
</table> </table>
</form> </form>

@ -55,6 +55,8 @@ import net.yacy.peers.NewsPool;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Fulltext;
import net.yacy.search.index.Segment;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
@ -69,12 +71,23 @@ public class Crawler_p {
// inital values for AJAX Elements (without JavaScript) // inital values for AJAX Elements (without JavaScript)
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
prop.put("rejected", 0); prop.put("rejected", 0);
prop.put("urlpublictextSize", 0);
prop.put("urlpublictextSegmentCount", 0); Segment segment = sb.index;
prop.put("webgraphSize", 0); Fulltext fulltext = segment.fulltext();
prop.put("webgraphSegmentCount", 0); String localSolr = "/solr/select?core=collection1&q=*:*&start=0&rows=3";
prop.put("rwipublictextSize", 0); String remoteSolr = env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, localSolr);
prop.put("rwipublictextSegmentCount", 0); if (!remoteSolr.endsWith("/")) remoteSolr = remoteSolr + "/";
prop.put("urlpublictextSolrURL", fulltext.connectedLocalSolr() ? localSolr : remoteSolr + "collection1/select?&q=*:*&start=0&rows=3");
prop.putNum("urlpublictextSize", fulltext.collectionSize());
prop.putNum("urlpublictextSegmentCount", fulltext.getDefaultConnector().getSegmentCount());
prop.put("webgraphSolrURL", fulltext.connectedLocalSolr() ? localSolr.replace("collection1", "webgraph") : remoteSolr + "webgraph/select?&q=*:*&start=0&rows=3");
prop.putNum("webgraphSize", fulltext.writeToWebgraph() ? fulltext.webgraphSize() : 0);
prop.putNum("webgraphSegmentCount", fulltext.writeToWebgraph() ? fulltext.getWebgraphConnector().getSegmentCount() : 0);
prop.putNum("citationSize", segment.citationCount());
prop.putNum("citationSegmentCount", segment.citationSegmentCount());
prop.putNum("rwipublictextSize", segment.RWICount());
prop.putNum("rwipublictextSegmentCount", segment.RWISegmentCount());
prop.put("list", "0"); prop.put("list", "0");
prop.put("loaderSize", 0); prop.put("loaderSize", 0);
prop.put("loaderMax", 0); prop.put("loaderMax", 0);

@ -37,6 +37,7 @@ import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Fulltext;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema; import net.yacy.search.schema.WebgraphSchema;
@ -55,11 +56,12 @@ public class status_p {
final boolean html = post != null && post.containsKey("html"); final boolean html = post != null && post.containsKey("html");
prop.setLocalized(html); prop.setLocalized(html);
Segment segment = sb.index; Segment segment = sb.index;
Fulltext fulltext = segment.fulltext();
prop.put("rejected", "0"); prop.put("rejected", "0");
sb.updateMySeed(); sb.updateMySeed();
final int cacheMaxSize = (int) sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 10000); final int cacheMaxSize = (int) sb.getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 10000);
prop.putNum("ppm", Switchboard.currentPPM()); prop.put("ppm", Switchboard.currentPPM()); // we don't format the ppm here because that will cause that the progress bar shows nothing if the number is > 999
prop.putNum("qpm", sb.peers.mySeed().getQPM()); prop.putNum("qpm", sb.peers.mySeed().getQPM());
prop.putNum("wordCacheSize", segment.RWIBufferCount()); prop.putNum("wordCacheSize", segment.RWIBufferCount());
prop.putNum("wordCacheMaxSize", cacheMaxSize); prop.putNum("wordCacheMaxSize", cacheMaxSize);
@ -77,10 +79,10 @@ public class status_p {
prop.put("trafficCrawler", ByteCount.getAccountCount(ByteCount.CRAWLER)); prop.put("trafficCrawler", ByteCount.getAccountCount(ByteCount.CRAWLER));
// index size // index size
prop.putNum("urlpublictextSize", segment.fulltext().collectionSize()); prop.putNum("urlpublictextSize", fulltext.collectionSize());
prop.putNum("urlpublictextSegmentCount", segment.fulltext().getDefaultConnector().getSegmentCount()); prop.putNum("urlpublictextSegmentCount", fulltext.getDefaultConnector().getSegmentCount());
prop.putNum("webgraphSize", segment.fulltext().writeToWebgraph() ? segment.fulltext().webgraphSize() : 0); prop.putNum("webgraphSize", fulltext.writeToWebgraph() ? fulltext.webgraphSize() : 0);
prop.putNum("webgraphSegmentCount", segment.fulltext().writeToWebgraph() ? segment.fulltext().getWebgraphConnector().getSegmentCount() : 0); prop.putNum("webgraphSegmentCount", fulltext.writeToWebgraph() ? fulltext.getWebgraphConnector().getSegmentCount() : 0);
prop.putNum("citationSize", segment.citationCount()); prop.putNum("citationSize", segment.citationCount());
prop.putNum("citationSegmentCount", segment.citationSegmentCount()); prop.putNum("citationSegmentCount", segment.citationSegmentCount());
prop.putNum("rwipublictextSize", segment.RWICount()); prop.putNum("rwipublictextSize", segment.RWICount());
@ -143,11 +145,6 @@ public class status_p {
int collectionRemainingTimeMinutes = (int) (collectionRemainingTime / 60000); int collectionRemainingTimeMinutes = (int) (collectionRemainingTime / 60000);
int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000); int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000);
prop.put("postprocessingCollectionRemainingCount", collectionRemainingCount);
prop.put("postprocessingRunning_collectionSpeed", collectionSpeed);
prop.put("postprocessingRunning_collectionRemainingTimeMinutes", collectionRemainingTimeMinutes);
prop.put("postprocessingRunning_collectionRemainingTimeSeconds", collectionRemainingTimeSeconds);
long webgraphRemainingCount = 0; long webgraphRemainingCount = 0;
if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {} if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0; long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0;
@ -156,10 +153,14 @@ public class status_p {
int webgraphRemainingTimeMinutes = (int) (webgraphRemainingTime / 60000); int webgraphRemainingTimeMinutes = (int) (webgraphRemainingTime / 60000);
int webgraphRemainingTimeSeconds = (int) ((webgraphRemainingTime - (webgraphRemainingTimeMinutes * 60000)) / 1000); int webgraphRemainingTimeSeconds = (int) ((webgraphRemainingTime - (webgraphRemainingTimeMinutes * 60000)) / 1000);
prop.put("postprocessingCollectionRemainingCount", collectionRemainingCount);
prop.put("postprocessingWebgraphRemainingCount", webgraphRemainingCount); prop.put("postprocessingWebgraphRemainingCount", webgraphRemainingCount);
prop.put("postprocessingRunning_webgraphSpeed", webgraphSpeed); prop.put("postprocessingRunning_activity", collectionTimeSinceStart > 0 ? "collection" : "webgraph");
prop.put("postprocessingRunning_webgraphRemainingTimeMinutes", webgraphRemainingTimeMinutes); prop.put("postprocessingSpeed", collectionTimeSinceStart > 0 ? collectionSpeed : webgraphSpeed);
prop.put("postprocessingRunning_webgraphRemainingTimeSeconds", webgraphRemainingTimeSeconds); prop.put("postprocessingElapsedTime", collectionTimeSinceStart > 0 ? collectionTimeSinceStart : webgraphTimeSinceStart);
prop.put("postprocessingRemainingTime", collectionTimeSinceStart > 0 ? collectionRemainingTime : webgraphRemainingTime);
prop.put("postprocessingRemainingTimeMinutes", collectionTimeSinceStart > 0 ? collectionRemainingTimeMinutes : webgraphRemainingTimeMinutes);
prop.put("postprocessingRemainingTimeSeconds", collectionTimeSinceStart > 0 ? collectionRemainingTimeSeconds : webgraphRemainingTimeSeconds);
// return rewrite properties // return rewrite properties
return prop; return prop;

@ -74,16 +74,12 @@
<postprocessing> <postprocessing>
<collectionRemainingCount>#[postprocessingCollectionRemainingCount]#</collectionRemainingCount> <collectionRemainingCount>#[postprocessingCollectionRemainingCount]#</collectionRemainingCount>
<webgraphRemainingCount>#[postprocessingWebgraphRemainingCount]#</webgraphRemainingCount> <webgraphRemainingCount>#[postprocessingWebgraphRemainingCount]#</webgraphRemainingCount>
#(postprocessingRunning)# <status>#(postprocessingRunning)#idle::busy:#[activity]##(/postprocessingRunning)#</status>
<status>idle</status>:: <speed>#[postprocessingSpeed]#</speed>
<status>busy</status> <elapsedTime>#[postprocessingElapsedTime]#</elapsedTime>
<collectionSpeed>#[collectionSpeed]#</collectionSpeed> <remainingTime>#[postprocessingRemainingTime]#</remainingTime>
<collectionRemainingTimeMinutes>#[collectionRemainingTimeMinutes]#</collectionRemainingTimeMinutes> <remainingTimeMinutes>#[postprocessingRemainingTimeMinutes]#</remainingTimeMinutes>
<collectionRemainingTimeSeconds>#[collectionRemainingTimeSeconds]#</collectionRemainingTimeSeconds> <remainingTimeSeconds>#[postprocessingRemainingTimeSeconds]#</remainingTimeSeconds>
<webgraphSpeed>#[webgraphSpeed]#</webgraphSpeed>
<webgraphRemainingTimeMinutes>#[webgraphRemainingTimeMinutes]#</webgraphRemainingTimeMinutes>
<webgraphRemainingTimeSeconds>#[webgraphRemainingTimeSeconds]#</webgraphRemainingTimeSeconds>
#(/postprocessingRunning)#
</postprocessing> </postprocessing>
</status> </status>

@ -108,9 +108,16 @@ function handleStatus(){
postprocessing=getFirstChild(statusTag, "postprocessing"); postprocessing=getFirstChild(statusTag, "postprocessing");
document.getElementById("postprocessing_status").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "status")); document.getElementById("postprocessing_status").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "status"));
document.getElementById("postprocessing_collection").firstChild.nodeValue="collection: " + getValue(getFirstChild(postprocessing, "collectionRemainingCount")); document.getElementById("postprocessing_collection").firstChild.nodeValue="pending in collection: " + getValue(getFirstChild(postprocessing, "collectionRemainingCount"));
document.getElementById("postprocessing_webgraph").firstChild.nodeValue="webgraph: " + getValue(getFirstChild(postprocessing, "webgraphRemainingCount")); document.getElementById("postprocessing_webgraph").firstChild.nodeValue="pending in webgraph: " + getValue(getFirstChild(postprocessing, "webgraphRemainingCount"));
document.getElementById("postprocessing_time").firstChild.nodeValue=""; document.getElementById("postprocessing_remainingTimeMinutes").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "remainingTimeMinutes"));
document.getElementById("postprocessing_remainingTimeSeconds").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "remainingTimeSeconds"));
postprocessingElapsedTime=getValue(getFirstChild(postprocessing, "postprocessingElapsedTime"));
postprocessingRemainingTime=getValue(getFirstChild(postprocessing, "postprocessingRemainingTime"));
p = 100 * postprocessingElapsedTime / (postprocessingElapsedTime + postprocessingRemainingTime);
bar="<progress id='postprocessingBar' max='" + p + "' value='100' style='width:94%;'/>";
document.getElementById("postprocessing_bar").firstChild.nodeValue=bar;
//document.getElementById("postprocessing_speed").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "speed"));
load=getFirstChild(statusTag, "load"); load=getFirstChild(statusTag, "load");
document.getElementById("load").firstChild.nodeValue=getValue(load); document.getElementById("load").firstChild.nodeValue=getValue(load);

@ -255,7 +255,7 @@ public class pdfParser extends AbstractParser implements Parser {
if (clearResources != null) clearResources.invoke(null); if (clearResources != null) clearResources.invoke(null);
} }
} catch (Throwable e) { } catch (Throwable e) {
e.printStackTrace(); //e.printStackTrace();
} }
} }
} }

@ -2368,8 +2368,8 @@ public final class Switchboard extends serverSwitch {
log.info("cleanup post-processed " + proccount + " documents"); log.info("cleanup post-processed " + proccount + " documents");
} }
} }
postprocessingStartTime = new long[]{0,0}; // the start time for the processing; not started = 0 this.index.fulltext().commit(true); // without a commit the success is not visible in the monitoring
postprocessingStartTime = new long[]{0,0}; // the start time for the processing; not started = 0
postprocessingRunning = false; postprocessingRunning = false;
} }

Loading…
Cancel
Save