|
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
|
|
<head>
|
|
|
|
<title>YaCy '#[clientname]#': Watch Crawler</title>
|
|
|
|
#%env/templates/metas.template%#
|
|
|
|
<script type="text/javascript" src="/js/ajax.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/xml.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/html.js"></script>
|
|
|
|
<script type="text/javascript" src="/js/WatchCrawler.js"></script></head>
|
|
|
|
<body id="watchCrawler"> #%env/templates/header.template%#
|
|
|
|
<h2>Crawler Monitor</h2>
|
|
|
|
<p> Next update in <span id="nextUpdate" onclick="changeInterval()"></span> seconds. <img src="/env/grafics/empty.gif" name="ajax" alt="empty"/>
|
|
|
|
</p>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th>Queue</th>
|
|
|
|
<th>Size</th>
|
|
|
|
<th> </th>
|
|
|
|
<th>Max</th>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Indexing</td>
|
|
|
|
<td align="right"><span id="indexingqueuesize"> </span></td>
|
|
|
|
<td> </td>
|
|
|
|
<td align="right"><span id="indexingqueuemax"> </span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Loader</td>
|
|
|
|
<td align="right"><span id="loaderqueuesize"> </span></td>
|
|
|
|
<td> </td>
|
|
|
|
<td align="right"><span id="loaderqueuemax"> </span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Local Crawler</td>
|
|
|
|
<td align="right"><span id="localcrawlerqueuesize"> </span></td>
|
|
|
|
<td>
|
|
|
|
<a href="" id="localcrawlerstateA">
|
|
|
|
<img src="" alt="" style="width:12px; height:12px;" id="localcrawlerstateIMG" />
|
|
|
|
</a>
|
|
|
|
</td>
|
|
|
|
<td align="right">unlimited</td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Remote Crawler</td>
|
|
|
|
<td align="right"><span id="remotecrawlerqueuesize"> </span></td>
|
|
|
|
<td>
|
|
|
|
<a href="" title="" id="remotecrawlerstateA">
|
|
|
|
<img src="" alt="" style="width:12px; height:12px;" id="remotecrawlerstateIMG" />
|
|
|
|
</a>
|
|
|
|
</td>
|
|
|
|
<td align="right">unlimited</td>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</table>
|
|
|
|
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
|
|
|
|
<form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th colspan="3">Speed</th>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left" #(crawlingSpeedMinChecked)#::class="TableCellDark"#(/crawlingSpeedMinChecked)#><input type="submit" name="crawlingPerformance" value="minimum" /></td>
|
|
|
|
<td align="left" #(crawlingSpeedCustChecked)#::class="TableCellDark"#(/crawlingSpeedCustChecked)#><input name="customPPM" type="text" size="4" maxlength="4" value="#[customPPMdefault]#" />PPM <input type="submit" name="crawlingPerformance" value="custom" /></td>
|
|
|
|
<td align="left" #(crawlingSpeedMaxChecked)#::class="TableCellDark"#(/crawlingSpeedMaxChecked)#><input type="submit" name="crawlingPerformance" value="maximum" /></td>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</form>
|
|
|
|
</table>
|
|
|
|
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th>Database</th>
|
|
|
|
<th>Entries</th>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Pages (URLs)</td>
|
|
|
|
<td align="right"><span id="urldbsize"> </span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">RWIs (Words)</td>
|
|
|
|
<td align="right"><span id="rwidbsize"> </span></td>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</table>
|
|
|
|
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th>Indicator</th>
|
|
|
|
<th colspan="2">Level</th>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">PPM (Pages Per Minute)</td>
|
|
|
|
<td align="left"><span id="ppmNum"> </span></td>
|
|
|
|
<td align="left"><span id="ppmSpan"> </span></td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">Traffic (Crawler)</td>
|
|
|
|
<td align="left"><span id="trafficCrawler"> </span> MB</td>
|
|
|
|
<td> </td>
|
|
|
|
</tr>
|
|
|
|
<tr class="TableCellLight">
|
|
|
|
<td align="left">RWI RAM (Word Cache)</td>
|
|
|
|
<td align="left"><span id="wordcacheNum"> </span></td>
|
|
|
|
<td align="left"><span id="wordcacheSpan"> </span></td>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</table>
|
|
|
|
|
|
|
|
<p class="watchCrawler"> #(info)#
|
|
|
|
<!-- 0 -->
|
|
|
|
::
|
|
|
|
<!-- 1 -->
|
|
|
|
Error with profile management. Please stop YaCy, delete the file DATA/PLASMADB/crawlProfiles0.db
|
|
|
|
and restart. ::
|
|
|
|
<!-- 2 -->
|
|
|
|
Error: #[errmsg]# ::
|
|
|
|
<!-- 3 -->
|
|
|
|
Application not yet initialized. Sorry. Please wait some seconds and repeat
|
|
|
|
the request. ::
|
|
|
|
<!-- 4 -->
|
|
|
|
<strong>ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with
|
|
|
|
crawl root "#[crawlingStart]#".</strong> Please try again with different
|
|
|
|
filter. ::
|
|
|
|
<!-- 5 -->
|
|
|
|
Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#<br>
|
|
|
|
::
|
|
|
|
<!-- 6 -->
|
|
|
|
Error with URL input "#[crawlingStart]#": #[error]# ::
|
|
|
|
<!-- 7 -->
|
|
|
|
Error with file input "#[crawlingStart]#": #[error]# ::
|
|
|
|
<!-- 8 -->
|
|
|
|
Crawling of "#[crawlingURL]#" started. <strong>Please wait some seconds,
|
|
|
|
it may take some seconds until the first result appears there.</strong>
|
|
|
|
If you crawl any un-wanted pages, you can delete them <a href="IndexCreateWWWLocalQueue_p.html">here</a>.<br />
|
|
|
|
#(/info)# </p>
|
|
|
|
|
|
|
|
<!-- crawl queues -->
|
|
|
|
|
|
|
|
<p id="crawlingQueues"><strong>Crawl Queue:</strong></p>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1" id="queueTable">
|
|
|
|
<tbody>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<th>Queue</th>
|
|
|
|
<th>Profile</th>
|
|
|
|
<th>Initiator</th>
|
|
|
|
<th>Depth</th>
|
|
|
|
<th>Modified Date</th>
|
|
|
|
<th>Anchor Name</th>
|
|
|
|
<th>URL</th>
|
|
|
|
<th>Size</th>
|
|
|
|
<th>Delete</th>
|
|
|
|
</tr>
|
|
|
|
</tbody>
|
|
|
|
</table>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- crawl profile list -->
|
|
|
|
<p id="crawlingProfiles"><strong>Crawl Profiles:</strong></p>
|
|
|
|
<table border="0" cellpadding="2" cellspacing="1">
|
|
|
|
<colgroup>
|
|
|
|
<col width="120" />
|
|
|
|
<col />
|
|
|
|
<col width="16" />
|
|
|
|
<col width="60" />
|
|
|
|
<col width="10" span="2" />
|
|
|
|
<col />
|
|
|
|
<col width="10" span="5" />
|
|
|
|
</colgroup>
|
|
|
|
<tr class="TableHeader">
|
|
|
|
<td><strong>Crawl Thread</strong></td>
|
|
|
|
<td><strong>Start URL</strong></td>
|
|
|
|
<td><strong>Depth</strong></td>
|
|
|
|
<td><strong>Filter</strong></td>
|
|
|
|
<td><strong>MaxAge</strong></td>
|
|
|
|
<td><strong>Auto Filter Depth</strong></td>
|
|
|
|
<td><strong>Auto Filter Content</strong></td>
|
|
|
|
<td><strong>Max Page Per Domain</strong></td>
|
|
|
|
<td><strong>Accept '?' URLs</strong></td>
|
|
|
|
<td><strong>Fill Proxy Cache</strong></td>
|
|
|
|
<td><strong>Local Text Indexing</strong></td>
|
|
|
|
<td><strong>Local Media Indexing</strong></td>
|
|
|
|
<td><strong>Remote Indexing</strong></td>
|
|
|
|
<td></td>
|
|
|
|
</tr>
|
|
|
|
#{crawlProfiles}#
|
|
|
|
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
|
|
|
<td>#[name]#</td>
|
|
|
|
<td><a href="#[startURL]#">#[startURL]#</a></td>
|
|
|
|
<td>#[depth]#</td>
|
|
|
|
<td>#[filter]#</td>
|
|
|
|
<td>#[crawlingIfOlder]#</td>
|
|
|
|
<td>#[crawlingDomFilterDepth]#</td>
|
|
|
|
<td>#{crawlingDomFilterContent}##[item]#<br />#{/crawlingDomFilterContent}#</td>
|
|
|
|
<td>#[crawlingDomMaxPages]#</td>
|
|
|
|
<td>#(withQuery)#no::yes#(/withQuery)#</td>
|
|
|
|
<td>#(storeCache)#no::yes#(/storeCache)#</td>
|
|
|
|
<td>#(indexText)#no::yes#(/indexText)#</td>
|
|
|
|
<td>#(indexMedia)#no::yes#(/indexMedia)#</td>
|
|
|
|
<td>#(remoteIndexing)#no::yes#(/remoteIndexing)#</td>
|
|
|
|
<td>#(deleteButton)#::
|
|
|
|
<form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
|
|
|
|
<pre><input type="hidden" name="handle" value="#[handle]#" /></pre>
|
|
|
|
<pre><input type="submit" name="deleteprofile" value="Delete" /></pre>
|
|
|
|
</form>
|
|
|
|
#(/deleteButton)#
|
|
|
|
|
|
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
#{/crawlProfiles}#
|
|
|
|
</table>
|
|
|
|
|
|
|
|
#%env/templates/footer.template%#
|
|
|
|
</body>
|
|
|
|
</html>
|