Merge pull request #76 from luccioman/crawler

Crawl monitoring : refresh running crawls table
pull/77/head
luccioman 8 years ago committed by GitHub
commit 34658ddb9b

@ -1,4 +1,4 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Crawler</title>
@ -28,13 +28,14 @@
<fieldset id="queues" style="width:210px;float:left;">
<legend>Queues</legend>
<table border="0" class="watchCrawler">
<tbody>
<thead>
<tr class="TableHeader">
<th width="120">Queue<br/>&nbsp;</th>
<th width="60">Size<br/>&nbsp;</th>
<th width="30"><span class="glyphicon glyphicon-wrench"></span>
</th>
<th width="30"><span class="glyphicon glyphicon-wrench"></span></th>
</tr>
</thead>
<tbody>
<tr class="TableCellLight">
<td align="left"><a href="IndexCreateQueues_p.html?stack=LOCAL">Local Crawler</a></td>
<td align="right"><span id="localcrawlerqueuesize">#[localCrawlSize]#</span></td>
@ -88,12 +89,14 @@
<fieldset id="indexsize" style="width:240px;float:left;">
<legend>Index Size</legend>
<table border="0" class="watchCrawler">
<tbody>
<thead>
<tr class="TableHeader">
<th width="130">Database<br/>&nbsp;</th>
<th width="50">Entries<br/>&nbsp;</th>
<th width="40">Seg-<br/>ments</th>
</tr>
</thead>
<tbody>
<tr class="TableCellLight">
<td align="left">Documents<br/><a href="#[urlpublictextSolrURL]#">solr search api</a></td>
<td align="right"><span id="urlpublictextSize">#[urlpublictextSize]#</span></td>
@ -121,11 +124,13 @@
<legend>Progress</legend>
<form action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8">
<table border="0" class="watchCrawler">
<tbody>
<thead>
<tr class="TableHeader">
<th width="160">Indicator<br/>&nbsp;</th>
<th width="300" colspan="4">Level<br/>&nbsp;</th>
</tr>
</thead>
<tbody>
<tr class="TableCellLight">
<td align="left">Speed / PPM<br/>(Pages Per Minute)</td>
<td align="left" colspan="4">
@ -219,33 +224,39 @@ window.setInterval("setTableSize()", 1000);
<!-- crawl profile list -->
#(crawlProfilesShow)#::
<fieldset>
<legend>Running Crawls (#[count]#)</legend>
<legend id="runningCrawlsLegend">Running Crawls (#[count]#)</legend>
<table width="96%">
<tr><td>
<table border="0" summary="A list of crawl profiles and their current settings.">
<table border="0" summary="A list of crawl profiles and their current settings." id="crawlProfiles">
<colgroup>
<col width="16" />
<col width="140"/>
</colgroup>
<thead>
<tr class="TableHeader">
<td><strong>Name</strong></td>
#(debug)#::<td><strong>Count</strong></td>#(/debug)#
<td><strong>Status</strong></td>
<th><strong>Name</strong></th>
#(debug)#::<th id="headerDebug"><strong>Count</strong></th>#(/debug)#
<th><strong>Status</strong></th>
</tr>
</thead>
<tbody>
#{list}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<tr class="TableCell#(dark)#Light::Dark#(/dark)#" id="#[handle]#">
<td>#[name]#</td>
#(debug)#::<td>#[count]#</td>#(/debug)#
<td>#(terminateButton)#::
<div style="text-decoration:blink;float:left;">Running</div>
<form style="float:left;" action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8"><div>
<td id="#[handle]#_status_cell">#(terminateButton)#::
<div id="#[handle]#_status" style="text-decoration:blink;float:left;">Running</div>
<form id="#[handle]#_terminate" style="float:left;" action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8">
<div>
<input type="hidden" name="handle" value="#[handle]#" />
<input type="submit" name="terminate" value="Terminate" class="btn btn-danger btn-xs"/>
</div></form>
</div>
</form>
#(/terminateButton)#
</td>
</tr>
#{/list}#
</tbody>
</table>
</td>
#(linkstructure)#

@ -28,13 +28,35 @@ var refreshInterval=2;
var wait=0;
var changing=false; //change the interval
var statusLoaded=true;
/* Running crawls table DOM element */
var crawlsTable;
/* Size of the running crawls table header */
var crawlsHeadLength;
/* Running crawls legend DOM element */
var runningCrawlsLegend;
/* true when debug is enabled */
var debug;
function initCrawler(){
initCrawlProfiles();
refresh();
//loadInterval=window.setInterval("refresh()", refreshInterval*1000);
countInterval=window.setInterval("countdown()", 1000);
}
/**
* Init variables used to refresh the running crawls table
*/
function initCrawlProfiles() {
debug = document.getElementById("headerDebug") != null;
crawlsTable = document.getElementById("crawlProfiles");
if(crawlsTable != null && crawlsTable.rows != null) {
crawlsHeadLength = crawlsTable.tHead != null ? crawlsTable.tHead.rows.length : 0;
}
runningCrawlsLegend = document.getElementById("runningCrawlsLegend");
}
function changeInterval(){
if(!changing){
window.clearInterval(countInterval);
@ -84,9 +106,9 @@ function handleStatus(){
return;
}
var statusResponse = statusRPC.responseXML;
statusTag=getFirstChild(statusResponse, "status");
var statusTag = getFirstChild(statusResponse, "status");
ppm=getValue(getFirstChild(statusTag, "ppm"));
var ppm = getValue(getFirstChild(statusTag, "ppm"));
var ppmNum = document.getElementById("ppmNum");
removeAllChildren(ppmNum);
@ -102,21 +124,21 @@ function handleStatus(){
// ppmBar end
// traffic output (no bar up to now)
traffic = getFirstChild(statusTag, "traffic");
trafficCrawlerValue = getValue(getFirstChild(traffic, "crawler"));
trafCrawlerSpan = document.getElementById("trafficCrawler");
var traffic = getFirstChild(statusTag, "traffic");
var trafficCrawlerValue = getValue(getFirstChild(traffic, "crawler"));
var trafCrawlerSpan = document.getElementById("trafficCrawler");
removeAllChildren(trafCrawlerSpan);
trafCrawlerSpan.appendChild(document.createTextNode(Math.round((trafficCrawlerValue) / 1024 / 10.24) / 100));
dbsize=getFirstChild(statusTag, "dbsize");
urlpublictext=getValue(getFirstChild(dbsize, "urlpublictext"));
urlpublictextSegmentCount=getValue(getFirstChild(dbsize, "urlpublictextSegmentCount"));
webgraph=getValue(getFirstChild(dbsize, "webgraph"));
webgraphSegmentCount=getValue(getFirstChild(dbsize, "webgraphSegmentCount"));
citation=getValue(getFirstChild(dbsize, "citation"));
citationSegmentCount=getValue(getFirstChild(dbsize, "citationSegmentCount"));
rwipublictext=getValue(getFirstChild(dbsize, "rwipublictext"));
rwipublictextSegmentCount=getValue(getFirstChild(dbsize, "rwipublictextSegmentCount"));
var dbsize = getFirstChild(statusTag, "dbsize");
var urlpublictext = getValue(getFirstChild(dbsize, "urlpublictext"));
var urlpublictextSegmentCount = getValue(getFirstChild(dbsize, "urlpublictextSegmentCount"));
var webgraph = getValue(getFirstChild(dbsize, "webgraph"));
var webgraphSegmentCount = getValue(getFirstChild(dbsize, "webgraphSegmentCount"));
var citation = getValue(getFirstChild(dbsize, "citation"));
var citationSegmentCount = getValue(getFirstChild(dbsize, "citationSegmentCount"));
var rwipublictext = getValue(getFirstChild(dbsize, "rwipublictext"));
var rwipublictextSegmentCount = getValue(getFirstChild(dbsize, "rwipublictextSegmentCount"));
document.getElementById("urlpublictextSize").firstChild.nodeValue=urlpublictext;
document.getElementById("urlpublictextSegmentCount").firstChild.nodeValue=urlpublictextSegmentCount;
document.getElementById("webgraphSize").firstChild.nodeValue=webgraph;
@ -126,54 +148,222 @@ function handleStatus(){
document.getElementById("rwipublictextSize").firstChild.nodeValue=rwipublictext;
document.getElementById("rwipublictextSegmentCount").firstChild.nodeValue=rwipublictextSegmentCount;
postprocessing=getFirstChild(statusTag, "postprocessing");
refreshRunningCrawls(statusTag);
var postprocessing = getFirstChild(statusTag, "postprocessing");
document.getElementById("postprocessing_status").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "status"));
document.getElementById("postprocessing_collection").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "collectionRemainingCount"));
document.getElementById("postprocessing_webgraph").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "webgraphRemainingCount"));
document.getElementById("postprocessing_remainingTimeMinutes").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "remainingTimeMinutes"));
document.getElementById("postprocessing_remainingTimeSeconds").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "remainingTimeSeconds"));
postprocessingElapsedTime=parseInt(getValue(getFirstChild(postprocessing, "ElapsedTime")));
postprocessingRemainingTime=parseInt(getValue(getFirstChild(postprocessing, "RemainingTime")));
p = 100 * postprocessingElapsedTime / (postprocessingElapsedTime + postprocessingRemainingTime) || 0;
var postprocessingElapsedTime = parseInt(getValue(getFirstChild(postprocessing, "ElapsedTime")));
var postprocessingRemainingTime = parseInt(getValue(getFirstChild(postprocessing, "RemainingTime")));
var p = 100 * postprocessingElapsedTime / (postprocessingElapsedTime + postprocessingRemainingTime) || 0;
document.getElementById("postprocessing_bar").firstChild.setAttribute("value", p);
//document.getElementById("postprocessing_speed").firstChild.nodeValue=getValue(getFirstChild(postprocessing, "speed"));
load=getFirstChild(statusTag, "load");
var load = getFirstChild(statusTag, "load");
document.getElementById("load").firstChild.nodeValue=getValue(load);
loaderqueue=getFirstChild(statusTag, "loaderqueue");
loaderqueue_size=getValue(getFirstChild(loaderqueue, "size"));
loaderqueue_max=getValue(getFirstChild(loaderqueue, "max"));
var loaderqueue = getFirstChild(statusTag, "loaderqueue");
var loaderqueue_size = getValue(getFirstChild(loaderqueue, "size"));
var loaderqueue_max = getValue(getFirstChild(loaderqueue, "max"));
document.getElementById("loaderqueuesize").firstChild.nodeValue=loaderqueue_size;
document.getElementById("loaderqueuemax").firstChild.nodeValue=loaderqueue_max;
localcrawlerqueue=getFirstChild(statusTag, "localcrawlerqueue");
localcrawlerqueue_size=getValue(getFirstChild(localcrawlerqueue, "size"));
localcrawlerqueue_state=getValue(getFirstChild(localcrawlerqueue, "state"));
var localcrawlerqueue = getFirstChild(statusTag, "localcrawlerqueue");
var localcrawlerqueue_size = getValue(getFirstChild(localcrawlerqueue, "size"));
var localcrawlerqueue_state = getValue(getFirstChild(localcrawlerqueue, "state"));
document.getElementById("localcrawlerqueuesize").firstChild.nodeValue=localcrawlerqueue_size;
putQueueState("localcrawler", localcrawlerqueue_state);
limitcrawlerqueue=getFirstChild(statusTag, "limitcrawlerqueue");
limitcrawlerqueue_size=getValue(getFirstChild(limitcrawlerqueue, "size"));
limitcrawlerqueue_state=getValue(getFirstChild(limitcrawlerqueue, "state"));
var limitcrawlerqueue = getFirstChild(statusTag, "limitcrawlerqueue");
var limitcrawlerqueue_size = getValue(getFirstChild(limitcrawlerqueue, "size"));
var limitcrawlerqueue_state = getValue(getFirstChild(limitcrawlerqueue, "state"));
document.getElementById("limitcrawlerqueuesize").firstChild.nodeValue=limitcrawlerqueue_size;
putQueueState("limitcrawler", limitcrawlerqueue_state);
remotecrawlerqueue=getFirstChild(statusTag, "remotecrawlerqueue");
remotecrawlerqueue_size=getValue(getFirstChild(remotecrawlerqueue, "size"));
remotecrawlerqueue_state=getValue(getFirstChild(remotecrawlerqueue, "state"));
var remotecrawlerqueue = getFirstChild(statusTag, "remotecrawlerqueue");
var remotecrawlerqueue_size = getValue(getFirstChild(remotecrawlerqueue, "size"));
var remotecrawlerqueue_state = getValue(getFirstChild(remotecrawlerqueue, "state"));
document.getElementById("remotecrawlerqueuesize").firstChild.nodeValue=remotecrawlerqueue_size;
putQueueState("remotecrawler", remotecrawlerqueue_state);
noloadcrawlerqueue=getFirstChild(statusTag, "noloadcrawlerqueue");
noloadcrawlerqueue_size=getValue(getFirstChild(noloadcrawlerqueue, "size"));
noloadcrawlerqueue_state=getValue(getFirstChild(noloadcrawlerqueue, "state"));
var noloadcrawlerqueue = getFirstChild(statusTag, "noloadcrawlerqueue");
var noloadcrawlerqueue_size = getValue(getFirstChild(noloadcrawlerqueue, "size"));
var noloadcrawlerqueue_state = getValue(getFirstChild(noloadcrawlerqueue, "state"));
document.getElementById("noloadcrawlerqueuesize").firstChild.nodeValue=noloadcrawlerqueue_size;
putQueueState("noloadcrawler", noloadcrawlerqueue_state);
statusLoaded=true;
}
/**
* Insert a new crawl line to the end of the running crawls table
* @param table crawls table HTML DOM node
* @param crawl crawl profile node from status_p.xml
* @param handle {String} identifier of the running crawl profile
* @param status {String} running status of the crawl profile
*/
function insertCrawlRaw(table, crawl, handle, status) {
/* Insert a row in the table at the end */
var newRow = table.insertRow();
newRow.className = ((table.rows.length - crawlsHeadLength) % 2) == 0 ? "TableCellLight" : "TableCellDark";
newRow.id = handle;
/* Insert name cell */
var newCell = newRow.insertCell();
var newText = document.createTextNode(getValue(getFirstChild(crawl, "name")));
newCell.appendChild(newText);
if(debug) {
/* Insert count cell when debug is enabled */
newCell = newRow.insertCell();
newCell.textContent = getValue(getFirstChild(crawl, "count"));
}
/* Insert status cell */
newCell = newRow.insertCell();
newCell.id = handle + "_status_cell";
if(status == "alive") {
var newDiv = document.createElement("div");
newDiv.id = handle + "_status";
newDiv.style = "text-decoration:blink;float:left;";
newText = document.createTextNode("Running");
newDiv.appendChild(newText);
newCell.appendChild(newDiv);
var newForm = document.createElement("form");
newForm.id = handle + "_terminate";
newForm.style = "float:left;";
newForm.action = "Crawler_p.html";
newForm.method = "get";
newForm.enctype="multipart/form-data";
newForm["accept-charset"]="UTF-8";
newDiv = document.createElement("div");
var newInput = document.createElement("input");
newInput.type = "hidden";
newInput.name = "handle";
newInput.value = handle;
newDiv.appendChild(newInput);
newInput = document.createElement("input");
newInput.type = "submit";
newInput.name = "terminate";
newInput.value = "Terminate";
newInput.className = "btn btn-danger btn-xs";
newDiv.appendChild(newInput);
newForm.appendChild(newDiv);
newCell.appendChild(newForm);
}
}
/**
* Refresh status cell text and terminate button presence
* @param handle name of the crawl
* @param status current crawl status label
*/
function refreshStatusCell(handle, status) {
var handleStatus = document.getElementById(handle + "_status");
if(handleStatus != null) {
handleStatus.textContent = status;
}
var terminateForm = document.getElementById(handle + "_terminate");
if(terminateForm != null && terminateForm.parentElement) {
terminateForm.parentElement.removeChild(terminateForm);
}
}
/**
* Refresh the count in running crawls legend
* @param legend the HTML DOM legend element
* @param crawls crawls node from xml api status_p.xml
*/
function refreshCrawlsLegend(legend, crawls) {
var count = crawls.getAttribute("count");
if(count && legend != null) {
legend.textContent = "Running Crawls (" + count + ")";
}
}
/**
* Refresh dark/light rows style
* @param table running crawls table
*/
function refreshRowsStyle(table, headLength) {
for(var i = headLength; i < table.rows.length; i++) {
raw = table.rows[i];
raw.className = ((i - headLength) % 2) == 0 ? "TableCellLight" : "TableCellDark";
}
}
/**
* Refresh running crawls table
*
* @param statusTag
* status tag from xml api status_p.xml
*/
function refreshRunningCrawls(statusTag) {
var crawls = getFirstChild(statusTag, "crawls");
/* crawls node should be present even when no crawl is running */
if(crawls != null) {
/* Update the table when present */
if(crawlsTable != null && crawlsTable.rows != null) {
var processedHandles = {}, crawlNode = getFirstChild(crawls, "crawl");
if(crawlNode) {
var handle, rowIndex, handleCell;
/* Loop on crawl node elements from xml */
for(; crawlNode; crawlNode = getNextSibling(crawlNode, "crawl")) {
handle = getValue(getFirstChild(crawlNode, "handle"));
if(handle != null) {
processedHandles[handle] = crawlNode;
status = getValue(getFirstChild(crawlNode, "status"));
/* Let's try to get the crawls table cell with id prefixed by this handle */
handleCell = document.getElementById(handle + "_status_cell");
if(handleCell == null) {
insertCrawlRaw(crawlsTable, crawlNode, handle, status);
refreshCrawlsLegend(runningCrawlsLegend, crawls);
refreshRowsStyle(crawlsTable, crawlsHeadLength);
} else if(status != "alive"){
refreshStatusCell(handle, status);
}
}
}
}
/* Collect raws to delete */
var raw, rawsToDelete = [];
for(var i = crawlsHeadLength; i < crawlsTable.rows.length; i++) {
raw = crawlsTable.rows[i];
if(processedHandles[raw.id] == null) {
rawsToDelete.push(raw);
}
}
/* Delete raws */
for(var i = 0; i < rawsToDelete.length; i++) {
raw = rawsToDelete[i];
raw.parentElement.removeChild(raw);
}
/* Refresh legend and rows style (dark/light alternate) */
if(rawsToDelete.length > 0) {
refreshCrawlsLegend(runningCrawlsLegend, crawls);
refreshRowsStyle(crawlsTable, crawlsHeadLength);
}
}
}
}
function putQueueState(queue, state) {
a = document.getElementById(queue + "stateA");
img = document.getElementById(queue + "stateIMG");

Loading…
Cancel
Save