redesign of the crawler monitor page: show crawled pages instead of

queue of urls that shall be crawled
pull/1/head
Michael Peter Christen 13 years ago
parent 8b974905ee
commit c846e9ca14

@ -1,27 +1,21 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Crawler Queues</title>
<title>YaCy '#[clientname]#': Crawler</title>
#%env/templates/metas.template%#
<script type="text/javascript" src="/js/ajax.js"></script>
<script type="text/javascript" src="/js/xml.js"></script>
<script type="text/javascript" src="/js/html.js"></script>
<script type="text/javascript" src="/js/rss2.js"></script>
<script type="text/javascript" src="/js/query.js"></script>
<script type="text/javascript" src="/js/Crawler.js"></script>
<script type="text/javascript">
function refreshiframe()
{
var f = document.getElementById('QueuesTable');
f.contentWindow.location.reload(true);
setTimeout("refreshiframe()", 2000);
}
</script>
</head>
<body id="Crawler" onload="initCrawler();refreshiframe();">
<body id="Crawler" onload="initCrawler();">
#%env/templates/header.template%#
#%env/templates/submenuCrawlMonitor.template%#
<h2>Crawler Queues</h2>
<h2>Crawler</h2>
<noscript><p>(Please enable JavaScript to automatically update this page!)</p></noscript>
<fieldset style="width:240px;height:130px;float:left;">
<fieldset style="width:220px;height:140px;float:left;">
<legend>Queues</legend>
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody>
@ -74,7 +68,7 @@
</tbody>
</table>
</fieldset>
<fieldset style="width:160px;height:130px;float:left;">
<fieldset style="width:140px;height:140px;float:left;">
<legend>Index Size</legend>
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody>
@ -93,13 +87,13 @@
</tbody>
</table>
</fieldset>
<fieldset style="width:440px;height:130px;;float:left;">
<fieldset style="width:520px;height:140px;;float:left;">
<legend>Progress</legend>
<form action="Crawler_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<table border="0" cellpadding="2" cellspacing="1" class="watchCrawler">
<tbody>
<tr class="TableHeader">
<th>Indicator</th>
<th width="80px">Indicator</th>
<th colspan="2">Level</th>
</tr>
<tr class="TableCellLight">
@ -113,23 +107,20 @@
<tr class="TableCellLight">
<td align="left">PPM (Pages Per Minute)</td>
<td align="left" width="20"><span id="ppmNum">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left"><span id="ppmSpan">&nbsp;&nbsp;&nbsp;</span></td>
<td align="left" width="400px"><span id="ppmSpan">&nbsp;&nbsp;&nbsp;</span></td>
</tr>
<tr class="TableCellLight">
<td align="left">Traffic (Crawler)</td>
<td align="left"><span id="trafficCrawler">&nbsp;&nbsp;&nbsp;</span> MB</td>
<td>&nbsp;</td>
</tr>
<tr class="TableCellLight">
<td align="left">RWI RAM (Word Cache)</td>
<td align="left"><span id="wordcacheNum">&nbsp;</span></td>
<td align="left"><span id="wordcacheSpan">&nbsp;</span></td>
</tr>
</tbody>
</table>
</form>
<p class="watchCrawler"> #(info)#
</fieldset>
<p class="watchCrawler" style="clear:both;">
#(info)#
<!-- 0 -->
::
<!-- 1 -->
@ -155,11 +146,11 @@
Crawling of "#[crawlingURL]#" started. <strong>Please wait some seconds,
it may take some seconds until the first result appears there.</strong>
If you crawl any un-wanted pages, you can delete them <a href="IndexCreateWWWLocalQueue_p.html">here</a>.<br />
#(/info)# </p>
</fieldset>
<p style="clear:both;"></p>
#(/info)#
</p>
<!-- crawl queues -->
<p>See an <a href="/api/latency_p.xml">access timing</a></p>
<!-- crawl profile list -->
#(crawlProfilesShow)#::
<fieldset>
@ -187,13 +178,13 @@
</tr>
#{/list}#
</table>
<h3>Crawled Pages</h3>
<p id="crawllist"></p>
</fieldset>
#(/crawlProfilesShow)#
<p>See an <a href="/api/latency_p.xml">access timing</a></p>
<iframe id="QueuesTable" src="IndexCreateQueues_p.html?embed=&urlsPerHost=1" width="100%" height="0" align="left" scrolling="no" marginheight="0" marginwidth="0" frameborder="0" ></iframe>
#%env/templates/footer.template%#
</body>
</html>

@ -5,7 +5,7 @@
<div class="SubMenugroup">
<h3>Processing Monitor</h3>
<ul class="SubMenu">
<li><a href="/Crawler_p.html" class="MenuItemLink lock">Crawler Queues</a></li>
<li><a href="/Crawler_p.html" class="MenuItemLink lock">Crawler</a></li>
<li><a href="/IndexCreateLoaderQueue_p.html" class="MenuItemLink lock">Loader</a></li>
<li><a href="/IndexCreateParserErrors_p.html" class="MenuItemLink lock">Rejected URLs</a></li>
</ul>

@ -3,9 +3,8 @@ BAR_IMG1="/env/grafics/green-block.png";
BAR_IMG2="/env/grafics/red-block.png";
WORDCACHEBAR_LENGTH=1/4;
var statusRPC;
var refreshInterval=3;
var refreshInterval=2;
var wait=0;
var changing=false; //change the interval
var statusLoaded=true;
@ -50,6 +49,7 @@ function refresh(){
wait=refreshInterval;
statusLoaded=false;
requestStatus();
getRSS("/api/feed.xml?count=20&set=REMOTEINDEXING,LOCALINDEXING&time=" + (new Date()).getTime());
}
function requestStatus(){
@ -90,10 +90,6 @@ function handleStatus(){
var wordCacheSize=getValue(getFirstChild(statusTag, "wordCacheSize"));
var wordCacheMaxSize=getValue(getFirstChild(statusTag, "wordCacheMaxSize"));
wordCacheNum=document.getElementById("wordcacheNum");
removeAllChildren(wordCacheNum);
wordCacheNum.appendChild(document.createTextNode(wordCacheSize+"/"+wordCacheMaxSize));
wordCacheSpan=document.getElementById("wordcacheSpan");
removeAllChildren(wordCacheSpan);
var img;
@ -164,7 +160,6 @@ function putQueueState(queue, state) {
}
}
function shortenURL(url) {
if (url.length > 80) {
return url.substr(0, 80) + "...";
@ -187,3 +182,17 @@ function createIndexingRow(queue, profile, initiator, depth, modified, anchor, u
row.appendChild(deletebutton);
return row;
}
crawllist_head = "<table cellpadding='2' cellspacing='1' ><tr class='TableHeader'><td width='50%'><strong>Title</strong></td><td width='50%'><strong>URL</strong></td></tr>";
crawllist_body = "";
crawllist_tail = "</table>";
function showRSS(RSS) {
var doc = document.getElementById("crawllist");
if (doc != null) {
for (var i=0; i<RSS.items.length; i++) {
crawllist_body = "<tr class='TableCellLight'><td><a href='ViewFile.html?action=info&urlHash=" + RSS.items[i].guid.value + "' class='small' target='_blank' title='" + RSS.items[i].link + "'>" + RSS.items[i].description + "</a></td><td><a href='ViewFile.html?action=info&urlHash=" + RSS.items[i].guid.value + "' class='small' target='_blank' title='" + RSS.items[i].link + "'>" + RSS.items[i].link + "</a></td></tr>" + crawllist_body;
}
doc.innerHTML = crawllist_head + crawllist_body + crawllist_tail;
}
return true;
}

@ -36,6 +36,7 @@ import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.kelondro.data.meta.DigestURI;
public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> {
@ -105,6 +106,15 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
this.map.put("pubDate", ISO8601Formatter.FORMATTER.format());
this.map.put("guid", artificialGuidPrefix + Integer.toHexString((title + description + link).hashCode()));
}
public RSSMessage(final String title, final String description, final DigestURI link) {
this.map = new ConcurrentHashMap<String, String>();
this.map.put("title", title);
this.map.put("description", description);
this.map.put("link", link.toNormalform(true, false));
this.map.put("pubDate", ISO8601Formatter.FORMATTER.format());
this.map.put("guid", ASCII.String(link.hash()));
}
public RSSMessage() {
this.map = new ConcurrentHashMap<String, String>();

@ -2621,9 +2621,7 @@ public final class Switchboard extends serverSwitch
ASCII.getBytes(this.peers.mySeed().hash))
? EventChannel.LOCALINDEXING
: EventChannel.REMOTEINDEXING);
feed.addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url().toNormalform(
true,
false)));
feed.addMessage(new RSSMessage("Indexed web page", dc_title, queueEntry.url()));
} catch ( final IOException e ) {
//if (this.log.isFine()) log.logFine("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase);
addURLtoErrorDB(

Loading…
Cancel
Save