more monitoring for postprocessing

pull/1/head
Michael Peter Christen 11 years ago
parent 6842783761
commit fceac8cffd

@ -21,7 +21,7 @@
#%env/templates/submenuCrawlMonitor.template%#
<h2>Crawler</h2>
<noscript><p>(Please enable JavaScript to automatically update this page!)</p></noscript>
<fieldset style="width:180px;height:160px;float:left;">
<fieldset style="width:180px;height:180px;float:left;">
<legend>Queues</legend>
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody>
@ -75,12 +75,12 @@
</table>
<div class="warning" id="message">&nbsp;#[queuemessage]#<div>
</fieldset>
<fieldset style="width:220px;height:160px;float:left;">
<fieldset style="width:220px;height:180px;float:left;">
<legend>Index Size</legend>
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody>
<tr class="TableHeader">
<th width="140">Database<br/>&nbsp;</th>
<th width="160">Database<br/>&nbsp;</th>
<th width="80">Entries<br/>&nbsp;</th>
<th width="40">Seg-<br/>ments</th>
</tr>
@ -94,6 +94,11 @@
<td align="right"><span id="webgraphSize">#[webgraphSize]#</span></td>
<td align="right"><span id="webgraphSegmentCount">#[webgraphSegmentCount]#</span></td>
</tr>
<tr class="TableCellLight">
<td align="left">Citations<br/>(reverse link index)</td>
<td align="right"><span id="citationSize">#[citationSize]#</span></td>
<td align="right"><span id="citationSegmentCount">#[citationSegmentCount]#</span></td>
</tr>
<tr class="TableCellLight">
<td align="left">RWIs<br/>(P2P Chunks)</td>
<td align="right"><span id="rwipublictextSize">#[rwipublictextSize]#</span></td>
@ -102,7 +107,7 @@
</tbody>
</table>
</fieldset>
<fieldset style="width:430px;height:160px;;float:left;">
<fieldset style="width:430px;height:180px;;float:left;">
<legend>Progress</legend>
<form action="Crawler_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
@ -134,9 +139,14 @@
<tr class="TableCellLight">
<td align="left">Load</td>
<td align="left"><span id="load">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left" width="100">Postprocessing</td>
<td align="left"><span id="postprocessing">&nbsp;&nbsp;&nbsp;</span></td>
<td>&nbsp;</td>
<td colspan="3">&nbsp;</td>
</tr>
<tr class="TableCellLight">
<td align="left">Postprocessing</td>
<td align="left"><span id="postprocessing_status">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="postprocessing_collection">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="postprocessing_webgraph">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="postprocessing_time">&nbsp;&nbsp;&nbsp;</span></td>
</tr>
</tbody>
</table>

@ -25,6 +25,8 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.IOException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.Memory;
import net.yacy.crawler.CrawlSwitchboard;
@ -36,6 +38,8 @@ import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -77,6 +81,8 @@ public class status_p {
prop.putNum("urlpublictextSegmentCount", segment.fulltext().getDefaultConnector().getSegmentCount());
prop.putNum("webgraphSize", segment.fulltext().writeToWebgraph() ? segment.fulltext().webgraphSize() : 0);
prop.putNum("webgraphSegmentCount", segment.fulltext().writeToWebgraph() ? segment.fulltext().getWebgraphConnector().getSegmentCount() : 0);
prop.putNum("citationSize", segment.citationCount());
prop.putNum("citationSegmentCount", segment.citationSegmentCount());
prop.putNum("rwipublictextSize", segment.RWICount());
prop.putNum("rwipublictextSegmentCount", segment.RWISegmentCount());
@ -123,6 +129,38 @@ public class status_p {
prop.put("postprocessingRunning", Switchboard.postprocessingRunning ? 1 : 0);
boolean processCollection = sb.index.fulltext().getDefaultConfiguration().contains(CollectionSchema.process_sxt) && (sb.index.connectedCitation() || sb.index.fulltext().writeToWebgraph());
boolean processWebgraph = sb.index.fulltext().getWebgraphConfiguration().contains(WebgraphSchema.process_sxt) && sb.index.fulltext().writeToWebgraph();
long collectionTimeSinceStart = processCollection && Switchboard.postprocessingRunning ? System.currentTimeMillis() - Switchboard.postprocessingStartTime[0] : 0;
long webgraphTimeSinceStart = processWebgraph && Switchboard.postprocessingRunning ? System.currentTimeMillis() - Switchboard.postprocessingStartTime[1] : 0;
long collectionRemainingCount = 0;
if (processCollection) try {collectionRemainingCount = sb.index.fulltext().getDefaultConnector().getCountByQuery(CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
long collectionCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[0] - collectionRemainingCount : 0;
int collectionSpeed = collectionTimeSinceStart == 0 ? 0 : (int) (60000 * collectionCountSinceStart / collectionTimeSinceStart); // pages per minute
long collectionRemainingTime = collectionSpeed == 0 ? 0 : 60000 * collectionRemainingCount / collectionSpeed; // millis
int collectionRemainingTimeMinutes = (int) (collectionRemainingTime / 60000);
int collectionRemainingTimeSeconds = (int) ((collectionRemainingTime - (collectionRemainingTimeMinutes * 60000)) / 1000);
prop.put("postprocessingCollectionRemainingCount", collectionRemainingCount);
prop.put("postprocessingRunning_collectionSpeed", collectionSpeed);
prop.put("postprocessingRunning_collectionRemainingTimeMinutes", collectionRemainingTimeMinutes);
prop.put("postprocessingRunning_collectionRemainingTimeSeconds", collectionRemainingTimeSeconds);
long webgraphRemainingCount = 0;
if (processWebgraph) try {webgraphRemainingCount = sb.index.fulltext().getWebgraphConnector().getCountByQuery(WebgraphSchema.process_sxt.getSolrFieldName() + ":[* TO *]");} catch (IOException e) {}
long webgraphCountSinceStart = Switchboard.postprocessingRunning ? Switchboard.postprocessingCount[1] - webgraphRemainingCount : 0;
int webgraphSpeed = webgraphTimeSinceStart == 0 ? 0 : (int) (60000 * webgraphCountSinceStart / webgraphTimeSinceStart); // pages per minute
long webgraphRemainingTime = webgraphSpeed == 0 ? 0 : 60000 * webgraphRemainingCount / webgraphSpeed; // millis
int webgraphRemainingTimeMinutes = (int) (webgraphRemainingTime / 60000);
int webgraphRemainingTimeSeconds = (int) ((webgraphRemainingTime - (webgraphRemainingTimeMinutes * 60000)) / 1000);
prop.put("postprocessingWebgraphRemainingCount", webgraphRemainingCount);
prop.put("postprocessingRunning_webgraphSpeed", webgraphSpeed);
prop.put("postprocessingRunning_webgraphRemainingTimeMinutes", webgraphRemainingTimeMinutes);
prop.put("postprocessingRunning_webgraphRemainingTimeSeconds", webgraphRemainingTimeSeconds);
// return rewrite properties
return prop;
}

@ -25,6 +25,8 @@
<urlpublictextSegmentCount>#[urlpublictextSegmentCount]#</urlpublictextSegmentCount>
<webgraph>#[webgraphSize]#</webgraph>
<webgraphSegmentCount>#[webgraphSegmentCount]#</webgraphSegmentCount>
<citation>#[citationSize]#</citation>
<citationSegmentCount>#[citationSegmentCount]#</citationSegmentCount>
<rwipublictext>#[rwipublictextSize]#</rwipublictext>
<rwipublictextSegmentCount>#[rwipublictextSegmentCount]#</rwipublictextSegmentCount>
</dbsize>
@ -70,7 +72,18 @@
#(/crawlProfiles)#
<postprocessing>
<status>#(postprocessingRunning)#idle::busy#(/postprocessingRunning)#</status>
<collectionRemainingCount>#[postprocessingCollectionRemainingCount]#</collectionRemainingCount>
<webgraphRemainingCount>#[postprocessingWebgraphRemainingCount]#</webgraphRemainingCount>
#(postprocessingRunning)#
<status>idle</status>::
<status>busy</status>
<collectionSpeed>#[collectionSpeed]#</collectionSpeed>
<collectionRemainingTimeMinutes>#[collectionRemainingTimeMinutes]#</collectionRemainingTimeMinutes>
<collectionRemainingTimeSeconds>#[collectionRemainingTimeSeconds]#</collectionRemainingTimeSeconds>
<webgraphSpeed>#[webgraphSpeed]#</webgraphSpeed>
<webgraphRemainingTimeMinutes>#[webgraphRemainingTimeMinutes]#</webgraphRemainingTimeMinutes>
<webgraphRemainingTimeSeconds>#[webgraphRemainingTimeSeconds]#</webgraphRemainingTimeSeconds>
#(/postprocessingRunning)#
</postprocessing>
</status>

@ -93,17 +93,24 @@ function handleStatus(){
urlpublictextSegmentCount=getValue(getFirstChild(dbsize, "urlpublictextSegmentCount"));
webgraph=getValue(getFirstChild(dbsize, "webgraph"));
webgraphSegmentCount=getValue(getFirstChild(dbsize, "webgraphSegmentCount"));
citation=getValue(getFirstChild(dbsize, "citation"));
citationSegmentCount=getValue(getFirstChild(dbsize, "citationSegmentCount"));
rwipublictext=getValue(getFirstChild(dbsize, "rwipublictext"));
rwipublictextSegmentCount=getValue(getFirstChild(dbsize, "rwipublictextSegmentCount"));
document.getElementById("urlpublictextSize").firstChild.nodeValue=urlpublictext;
document.getElementById("urlpublictextSegmentCount").firstChild.nodeValue=urlpublictextSegmentCount;
document.getElementById("webgraphSize").firstChild.nodeValue=webgraph;
document.getElementById("webgraphSegmentCount").firstChild.nodeValue=webgraphSegmentCount;
document.getElementById("citationSize").firstChild.nodeValue=citation;
document.getElementById("citationSegmentCount").firstChild.nodeValue=citationSegmentCount;
document.getElementById("rwipublictextSize").firstChild.nodeValue=rwipublictext;
document.getElementById("rwipublictextSegmentCount").firstChild.nodeValue=rwipublictextSegmentCount;
postprocessing=getFirstChild(statusTag, "postprocessing");
document.getElementById("postprocessing").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "status"));
document.getElementById("postprocessing_status").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "status"));
document.getElementById("postprocessing_collection").firstChild.nodeValue="collection: " + getValue(getFirstChild(postprocessing, "collectionRemainingCount"));
document.getElementById("postprocessing_webgraph").firstChild.nodeValue="webgraph: " + getValue(getFirstChild(postprocessing, "webgraphRemainingCount"));
document.getElementById("postprocessing_time").firstChild.nodeValue="";
load=getFirstChild(statusTag, "load");
document.getElementById("load").firstChild.nodeValue=getValue(load);

@ -196,6 +196,14 @@ public class Segment {
this.urlCitationIndex = null;
}
public int citationCount() {
return this.urlCitationIndex == null ? 0 : this.urlCitationIndex.size();
}
public long citationSegmentCount() {
return this.urlCitationIndex == null ? 0 : this.urlCitationIndex.getSegmentCount();
}
public void connectUrlDb(final boolean useTailCache, final boolean exceed134217727) {
this.fulltext.connectUrlDb(UrlDbName, useTailCache, exceed134217727);
}

Loading…
Cancel
Save