|
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
|
|
<head>
|
|
|
|
<title>YaCy '#[clientname]#': Crawler</title>
|
|
|
|
#%env/templates/metas.template%#
|
|
|
|
<script type="text/javascript" src="/js/ajax.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/xml.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/html.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/rss2.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/query.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/Crawler.js"></script>
|
|
|
|
</head>
|
|
|
|
<body id="Crawler" onload="initCrawler();">
|
|
|
|
|
|
|
|
<div id="api">
|
|
|
|
<a href="/api/status_p.xml" id="apilink"><img src="/env/grafics/api.png" width="60" height="40" alt="API"/></a>
|
|
|
|
<span>Click on this API button to see an XML with information about the crawler status</span>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
#%env/templates/header.template%#
|
|
|
|
#%env/templates/submenuCrawlMonitor.template%#
|
|
|
|
<h2>Crawler</h2>
|
|
|
|
<noscript><p>(Please enable JavaScript to automatically update this page!)</p></noscript>
|
|
|
|
<fieldset style="width:180px;height:190px;float:left;">
|
|
|
|
<legend>Queues</legend>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th width="110">Queue<br/> </th>
|
|
|
|
<th>Size<br/> </th>
|
|
|
|
<th width="50">Pause/<br/>Resume</th>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Local Crawler</td>
|
|
|
|
<td align="right"><span id="localcrawlerqueuesize">#[localCrawlSize]#</span></td>
|
|
|
|
<td>
|
|
|
|
<a href="" id="localcrawlerstateA">
|
|
|
|
<img src="" alt="" style="width:12px; height:12px;" id="localcrawlerstateIMG" />
|
|
|
|
</a>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Limit Crawler</td>
|
|
|
|
<td align="right"><span id="limitcrawlerqueuesize">#[limitCrawlSize]#</span></td>
|
|
|
|
<td>
|
|
|
|
<a href="" title="" id="limitcrawlerstateA">
|
|
|
|
<img src="" alt="" style="width:12px; height:12px;" id="limitcrawlerstateIMG" />
|
|
|
|
</a>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Remote Crawler</td>
|
|
|
|
<td align="right"><span id="remotecrawlerqueuesize">#[remoteCrawlSize]#</span></td>
|
|
|
|
<td>
|
|
|
|
<a href="" title="" id="remotecrawlerstateA">
|
|
|
|
<img src="" alt="" style="width:12px; height:12px;" id="remotecrawlerstateIMG" />
|
|
|
|
</a>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">No-Load Crawler</td>
|
|
|
|
<td align="right"><span id="noloadcrawlerqueuesize">#[noloadCrawlSize]#</span></td>
|
|
|
|
<td>
|
|
|
|
<a href="" title="" id="noloadcrawlerstateA">
|
|
|
|
<img src="" alt="" style="width:12px; height:12px;" id="noloadcrawlerstateIMG" />
|
|
|
|
</a>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Loader (<span id="loaderqueuemax">#[loaderMax]#</span>)</td>
|
|
|
|
<td align="right"><span id="loaderqueuesize">#[loaderSize]#</span></td>
|
|
|
|
<td> </td>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</table>
|
|
|
|
<div class="warning" id="message"> #[queuemessage]#<div>
|
|
|
|
</fieldset>
|
|
|
|
<fieldset style="width:270px;height:190px;float:left;">
|
|
|
|
<legend>Index Size</legend>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th width="160">Database<br/> </th>
|
|
|
|
<th width="80">Entries<br/> </th>
|
|
|
|
<th width="40">Seg-<br/>ments</th>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Documents<br/><a href="#[urlpublictextSolrURL]#">solr search api</a></td>
|
|
|
|
<td align="right"><span id="urlpublictextSize">#[urlpublictextSize]#</span></td>
|
|
|
|
<td align="right"><span id="urlpublictextSegmentCount">#[urlpublictextSegmentCount]#</span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Webgraph Edges<br/><a href="#[webgraphSolrURL]#">solr search api</a></td>
|
|
|
|
<td align="right"><span id="webgraphSize">#[webgraphSize]#</span></td>
|
|
|
|
<td align="right"><span id="webgraphSegmentCount">#[webgraphSegmentCount]#</span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Citations<br/>(reverse link index)</td>
|
|
|
|
<td align="right"><span id="citationSize">#[citationSize]#</span></td>
|
|
|
|
<td align="right"><span id="citationSegmentCount">#[citationSegmentCount]#</span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">RWIs<br/>(P2P Chunks)</td>
|
|
|
|
<td align="right"><span id="rwipublictextSize">#[rwipublictextSize]#</span></td>
|
|
|
|
<td align="right"><span id="rwipublictextSegmentCount">#[rwipublictextSegmentCount]#</span></td>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</table>
|
|
|
|
</fieldset>
|
|
|
|
<fieldset style="width:480px;height:190px;;float:left;">
|
|
|
|
<legend>Progress</legend>
|
|
|
|
<form action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8">
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th width="120">Indicator<br/> </th>
|
|
|
|
<th width="300" colspan="4">Level<br/> </th>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Speed / PPM<br/>(Pages Per Minute)</td>
|
|
|
|
<td align="left" colspan="4">
|
|
|
|
<input id="customPPM" name="customPPM" type="number" min="10" max="30000" style="width:46px" value="#[customPPMdefault]#" />PPM
|
|
|
|
<input id="latencyFactor" name="latencyFactor" type="number" min="0.1" max="3.0" step="0.1" style="width:32px" value="#[latencyFactorDefault]#" />LF
|
|
|
|
<input id="MaxSameHostInQueue" name="MaxSameHostInQueue" type="number" min="1" max="30" style="width:32px" value="#[MaxSameHostInQueueDefault]#" />MH
|
|
|
|
<input type="submit" name="crawlingPerformance" value="set" />
|
|
|
|
(<a href="/Crawler_p.html?crawlingPerformance=minimum">min</a>/<a href="/Crawler_p.html?crawlingPerformance=maximum">max</a>)
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Crawler PPM</td>
|
|
|
|
<td align="left" width="40"><span id="ppmNum"> </span></td>
|
|
|
|
<td align="left" width="260px" colspan="3">
|
|
|
|
<progress id="ppmbar" max="30000" value="0" style="width:94%;"/>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left" valign="top" rowspan="2">Postprocessing Progress <span id="postprocessing_speed"> </span></td>
|
|
|
|
<td align="left" width="40"><span id="postprocessing_status"> </span></td>
|
|
|
|
<td align="left" width="260px" colspan="3">
|
|
|
|
<span id="postprocessing_bar"><progress id="postprocessingBar" max="30000" value="0" style="width:94%;"/></span>
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left"><span id="postprocessing_remainingTimeMinutes">0</span>:<span id="postprocessing_remainingTimeSeconds">0</span></td>
|
|
|
|
<td align="left"><span id="postprocessing_collection"> </span></td>
|
|
|
|
<td align="left"><span id="postprocessing_webgraph"> </span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Traffic (Crawler)</td>
|
|
|
|
<td align="left"><span id="trafficCrawler"> </span> MB</td>
|
|
|
|
<td colspan="3"> </td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Load</td>
|
|
|
|
<td align="left"><span id="load"> </span></td>
|
|
|
|
<td colspan="3"> </td>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</table>
|
|
|
|
</form>
|
|
|
|
|
|
|
|
</fieldset>
|
|
|
|
<p class="watchCrawler" style="clear:both;">
|
|
|
|
#(info)#
|
|
|
|
<!-- 0 -->
|
|
|
|
::
|
|
|
|
<!-- 1 -->
|
|
|
|
Error with profile management. Please stop YaCy, delete the file DATA/PLASMADB/crawlProfiles0.db
|
|
|
|
and restart. ::
|
|
|
|
<!-- 2 -->
|
|
|
|
Error: #[errmsg]# ::
|
|
|
|
<!-- 3 -->
|
|
|
|
Application not yet initialized. Sorry. Please wait some seconds and repeat
|
|
|
|
the request. ::
|
|
|
|
<!-- 4 -->
|
|
|
|
<strong>ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with
|
|
|
|
crawl root "#[crawlingStart]#".</strong> Please try again with different
|
|
|
|
filter. ::
|
|
|
|
<!-- 5 -->
|
|
|
|
Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#<br>
|
|
|
|
::
|
|
|
|
<!-- 6 -->
|
|
|
|
Error with URL input "#[crawlingStart]#": #[error]# ::
|
|
|
|
<!-- 7 -->
|
|
|
|
Error with file input "#[crawlingStart]#": #[error]# ::
|
|
|
|
<!-- 8 -->
|
|
|
|
Crawling of "#[crawlingURL]#" started. <strong>Please wait some seconds,
|
|
|
|
it may take some seconds until the first result appears there.</strong>
|
|
|
|
If you crawl any un-wanted pages, you can delete them <a href="IndexCreateQueues_p.html?stack=LOCAL">here</a>.<br />
|
|
|
|
#(/info)#
|
|
|
|
</p>
|
|
|
|
<!-- crawl queues -->
|
|
|
|
|
|
|
|
<!-- crawl profile list -->
|
|
|
|
#(crawlProfilesShow)#::
|
|
|
|
<fieldset>
|
|
|
|
<legend>Running Crawls (#[count]#)</legend>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" summary="A list of crawl profiles and their current settings.">
|
|
|
|
<colgroup>
|
|
|
|
<col width="16" />
|
|
|
|
<col width="140"/>
|
|
|
|
</colgroup>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<td><strong>Name</strong></td>
|
|
|
|
#(debug)#::<td><strong>Count</strong></td>#(/debug)#
|
|
|
|
<td><strong>Status</strong></td>
|
|
|
|
</tr>
|
|
|
|
#{list}#
|
|
|
|
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
|
|
|
<td>#[name]#</td>
|
|
|
|
#(debug)#::<td>#[count]#</td>#(/debug)#
|
|
|
|
<td>#(terminateButton)#::
|
|
|
|
<div style="text-decoration:blink;float:left;">Running</div>
|
|
|
|
<form style="float:left;" action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8"><div>
|
|
|
|
<input type="hidden" name="handle" value="#[handle]#" />
|
|
|
|
<input type="submit" name="terminate" value="Terminate" />
|
|
|
|
</div></form>
|
|
|
|
#(/terminateButton)#
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
#{/list}#
|
|
|
|
</table>
|
|
|
|
#(linkstructure)#::
|
|
|
|
<script type="text/javascript">
|
|
|
|
<!--
|
|
|
|
imagestub = "WebStructurePicture_p.png?host=#[hosts]#&depth=4&width=1024&height=512&nodes=600&time=1000&colortext=888888&colorback=FFFFFF&colordot0=1111BB&colordota=11BB11&colorline=222222&colorlineend=333333";
|
|
|
|
idx = 0;
|
|
|
|
setTimeout("doanimation(500)", 500);
|
|
|
|
function doanimation(nexttimeout) {
|
|
|
|
var accessPicture = document.getElementById("WebPicture");
|
|
|
|
if (accessPicture != null) {
|
|
|
|
idx++;
|
|
|
|
accessPicture.src = imagestub + "&idx=" + idx;
|
|
|
|
setTimeout("doanimation(" + (nexttimeout > 3000 ? 3000 : nexttimeout * 1.2) + ")", nexttimeout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
-->
|
|
|
|
</script>
|
|
|
|
<div style="clear:both; text-align:left;">
|
|
|
|
<img id="WebPicture" src="/env/grafics/invisible.png"/>
|
|
|
|
</div>
|
|
|
|
#(/linkstructure)#
|
|
|
|
<h3>Crawled Pages</h3>
|
|
|
|
<p id="crawllist"></p>
|
|
|
|
</fieldset>
|
|
|
|
#(/crawlProfilesShow)#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#%env/templates/footer.template%#
|
|
|
|
</body>
|
|
|
|
</html>
|