<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>YaCy '#[clientname]#': Crawler</title> #%env/templates/metas.template%# <script type="text/javascript" src="js/ajax.js"></script> <script type="text/javascript" src="js/xml.js"></script> <script type="text/javascript" src="js/html.js"></script> <script type="text/javascript" src="js/rss2.js"></script> <script type="text/javascript" src="js/query.js"></script> <script type="text/javascript" src="js/Crawler.js"></script> <!-- style for hypertree --> <link href="env/hypertree.css" rel="stylesheet"> </head> <body id="Crawler" onload="initCrawler();"> #%env/templates/header.template%# #%env/templates/submenuCrawlMonitor.template%# <div id="api"> <a href="api/status_p.xml" id="apilink"><img src="env/grafics/api.png" width="60" height="40" alt="API"/></a> <span>Click on this API button to see an XML with information about the crawler status</span> </div> <h2>Crawler</h2> <noscript><p>(Please enable JavaScript to automatically update this page!)</p></noscript> <fieldset id="queues" style="width:210px;float:left;"> <legend>Queues</legend> <table border="0" class="watchCrawler"> <tbody> <tr class="TableHeader"> <th width="120">Queue<br/> </th> <th width="60">Size<br/> </th> <th width="30"><span class="glyphicon glyphicon-wrench"></span> </th> </tr> <tr class="TableCellLight"> <td align="left"><a href="IndexCreateQueues_p.html?stack=LOCAL">Local Crawler</a></td> <td align="right"><span id="localcrawlerqueuesize">#[localCrawlSize]#</span></td> <td> <a href="" id="localcrawlerstateA"> <img src="" alt="" style="width:12px; height:12px;" id="localcrawlerstateIMG" /> </a> </td> </tr> <tr class="TableCellLight"> <td align="left">Limit Crawler</td> <td align="right"><span id="limitcrawlerqueuesize">#[limitCrawlSize]#</span></td> <td> <a href="" title="" id="limitcrawlerstateA"> <img src="" alt="" style="width:12px; height:12px;" id="limitcrawlerstateIMG" /> </a> </td> </tr> <tr class="TableCellLight"> <td align="left"><a href="IndexCreateQueues_p.html?stack=REMOTE">Remote Crawler</a></td> <td align="right"><span id="remotecrawlerqueuesize">#[remoteCrawlSize]#</span></td> <td> <a href="" title="" id="remotecrawlerstateA"> <img src="" alt="" style="width:12px; height:12px;" id="remotecrawlerstateIMG" /> </a> </td> </tr> <tr class="TableCellLight"> <td align="left"><a href="IndexCreateQueues_p.html?stack=NOLOAD">No-Load Crawler</a></td> <td align="right"><span id="noloadcrawlerqueuesize">#[noloadCrawlSize]#</span></td> <td> <a href="" title="" id="noloadcrawlerstateA"> <img src="" alt="" style="width:12px; height:12px;" id="noloadcrawlerstateIMG" /> </a> </td> </tr> <tr class="TableCellLight"> <td align="left"><a href="IndexCreateLoaderQueue_p.html">Loader</a> (<a href="PerformanceQueues_p.html#ThreadPoolSettings"><span id="loaderqueuemax">#[loaderMax]#</span></a>)</td> <td align="right"><span id="loaderqueuesize">#[loaderSize]#</span></td> <td> </td> </tr> </tbody> </table> #(terminate-button)#:: <form action="/Crawler_p.html" method="get" role="form"> <input type="hidden" name="queues_terminate_all" value="" /> <button type="submit" class="btn btn-danger" onclick="return confirm('Confirm Termination of All Crawls')"><span class="glyphicon glyphicon-remove-circle"></span> Terminate All</button> </form> #(/terminate-button)# </fieldset> <fieldset id="indexsize" style="width:240px;float:left;"> <legend>Index Size</legend> <table border="0" class="watchCrawler"> <tbody> <tr class="TableHeader"> <th width="130">Database<br/> </th> <th width="50">Entries<br/> </th> <th width="40">Seg-<br/>ments</th> </tr> <tr class="TableCellLight"> <td align="left">Documents<br/><a href="#[urlpublictextSolrURL]#">solr search api</a></td> <td align="right"><span id="urlpublictextSize">#[urlpublictextSize]#</span></td> <td align="right"><span id="urlpublictextSegmentCount">#[urlpublictextSegmentCount]#</span></td> </tr> <tr class="TableCellLight"> <td align="left">Webgraph Edges<br/><a href="#[webgraphSolrURL]#">solr search api</a></td> <td align="right"><span id="webgraphSize">#[webgraphSize]#</span></td> <td align="right"><span id="webgraphSegmentCount">#[webgraphSegmentCount]#</span></td> </tr> <tr class="TableCellLight"> <td align="left">Citations<br/>(reverse link index)</td> <td align="right"><span id="citationSize">#[citationSize]#</span></td> <td align="right"><span id="citationSegmentCount">#[citationSegmentCount]#</span></td> </tr> <tr class="TableCellLight"> <td align="left">RWIs<br/>(P2P Chunks)</td> <td align="right"><span id="rwipublictextSize">#[rwipublictextSize]#</span></td> <td align="right"><span id="rwipublictextSegmentCount">#[rwipublictextSegmentCount]#</span></td> </tr> </tbody> </table> </fieldset> <fieldset id="progress" style="width:500px;float:left;"> <legend>Progress</legend> <form action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8"> <table border="0" class="watchCrawler"> <tbody> <tr class="TableHeader"> <th width="160">Indicator<br/> </th> <th width="300" colspan="4">Level<br/> </th> </tr> <tr class="TableCellLight"> <td align="left">Speed / PPM<br/>(Pages Per Minute)</td> <td align="left" colspan="4"> <input id="customPPM" name="customPPM" type="number" min="10" max="30000" style="width:62px" value="#[customPPMdefault]#" />PPM <input id="latencyFactor" name="latencyFactor" type="number" min="0.1" max="3.0" step="0.1" style="width:40px" value="#[latencyFactorDefault]#" />LF <input id="MaxSameHostInQueue" name="MaxSameHostInQueue" type="number" min="1" max="30" style="width:32px" value="#[MaxSameHostInQueueDefault]#" />MH <input type="submit" name="crawlingPerformance" value="set" /> (<a href="Crawler_p.html?crawlingPerformance=minimum">min</a>/<a href="Crawler_p.html?crawlingPerformance=maximum">max</a>) </td> </tr> <tr class="TableCellLight"> <td align="left">Crawler PPM</td> <td align="left" width="60"><span id="ppmNum"> </span></td> <td align="left" width="260px" colspan="3"> <progress id="ppmbar" max="30000" value="0" style="width:94%;"/> </td> </tr> <tr class="TableCellLight"> <td align="left" valign="top" rowspan="2">Postprocessing Progress <span id="postprocessing_speed"> </span><br/><span id="postprocessing_status"> </span></td> <td align="left" width="40"><span id="postprocessing_remainingTimeMinutes">0</span>:<span id="postprocessing_remainingTimeSeconds">0</span></td> <td align="left" width="260px" colspan="3"> <span id="postprocessing_bar"><progress id="postprocessingBar" max="100" value="0" style="width:94%;"/></span> </td> </tr> <tr class="TableCellLight"> <td align="left">pending:</td> <td align="left">collection=<span id="postprocessing_collection"> </span></td> <td align="left">webgraph=<span id="postprocessing_webgraph"> </span></td> <td> </td> </tr> <tr class="TableCellLight"> <td align="left">Traffic (Crawler)</td> <td align="left" colspan="2"><span id="trafficCrawler"> </span> MB</td> <td colspan="2"> </td> </tr> <tr class="TableCellLight"> <td align="left">Load</td> <td align="left" colspan="2"><span id="load"> </span></td> <td colspan="2"> </td> </tr> </tbody> </table> </form> </fieldset> <script> function setTableSize() { var maxh = Math.max(document.getElementById("progress").children[1].clientHeight, document.getElementById("indexsize").children[1].clientHeight, document.getElementById("queues").children[1].clientHeight) + 42; if(lastMaxh !== maxh) { var lastMaxh = maxh; document.getElementById("indexsize").style.height = maxh + "px"; document.getElementById("progress").style.height = maxh + "px"; document.getElementById("queues").style.height = maxh + "px"; } } window.setInterval("setTableSize()", 1000); </script> <p class="watchCrawler" style="clear:both;"> #(info)# <!-- 0 --> :: <!-- 1 --> Error with profile management. Please stop YaCy, delete the file DATA/PLASMADB/crawlProfiles0.db and restart. :: <!-- 2 --> Error: #[errmsg]# :: <!-- 3 --> Application not yet initialized. Sorry. Please wait some seconds and repeat the request. :: <!-- 4 --> <strong>ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with crawl root "#[crawlingStart]#".</strong> Please try again with different filter. :: <!-- 5 --> Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#<br> :: <!-- 6 --> Error with URL input "#[crawlingStart]#": #[error]# :: <!-- 7 --> Error with file input "#[crawlingStart]#": #[error]# :: <!-- 8 --> Crawling of "#[crawlingURL]#" started. <strong>Please wait some seconds, it may take some seconds until the first result appears there.</strong> If you crawl any un-wanted pages, you can delete them <a href="IndexCreateQueues_p.html?stack=LOCAL">here</a>.<br /> #(/info)# </p> <!-- crawl queues --> #(info-queue)#::<div class="alert alert-warning">#[message]#</div>#(/info-queue)# <!-- crawl profile list --> #(crawlProfilesShow)#:: <fieldset> <legend>Running Crawls (#[count]#)</legend> <table width="96%"> <tr><td> <table border="0" summary="A list of crawl profiles and their current settings."> <colgroup> <col width="16" /> <col width="140"/> </colgroup> <tr class="TableHeader"> <td><strong>Name</strong></td> #(debug)#::<td><strong>Count</strong></td>#(/debug)# <td><strong>Status</strong></td> </tr> #{list}# <tr class="TableCell#(dark)#Light::Dark#(/dark)#"> <td>#[name]#</td> #(debug)#::<td>#[count]#</td>#(/debug)# <td>#(terminateButton)#:: <div style="text-decoration:blink;float:left;">Running</div> <form style="float:left;" action="Crawler_p.html" method="get" enctype="multipart/form-data" accept-charset="UTF-8"><div> <input type="hidden" name="handle" value="#[handle]#" /> <input type="submit" name="terminate" value="Terminate" class="btn btn-danger btn-xs"/> </div></form> #(/terminateButton)# </td> </tr> #{/list}# </table> </td> #(linkstructure)# <td> <form style="float:right;" action="Crawler_p.html"><input type="submit" name="showwebstructuregraph" class="btn btn-default btn-xs" value="show link structure"/><form> </td></tr></table> :: <td> <form style="float:right;" action="Crawler_p.html"><input type="submit" name="hidewebstructuregraph" class="btn btn-default btn-xs" value="hide graphic"/><form> </td></tr></table> <script src="/js/d3.v3.min.js"></script> <script src="/js/hypertree.js"></script> <div id="linkstructure"></div> <script>$(document).ready(linkstructure("#[hosts]#", "#linkstructure", 1280, 720, 3000, 700));</script>:: <td> <form style="float:right;" action="Crawler_p.html"><input type="submit" name="hidewebstructuregraph" class="btn btn-default btn-xs" value="hide graphic"/><form> </td></tr></table> <script type="text/javascript"> imagestub = "WebStructurePicture_p.png?host=#[hosts]#&depth=4&width=1024&height=512&nodes=600&time=1000&colortext=888888&colorback=FFFFFF&colordot0=1111BB&colordota=11BB11&colorline=222222&colorlineend=333333"; idx = 0; setTimeout("doanimation(500)", 500); function doanimation(nexttimeout) { var accessPicture = document.getElementById("WebPicture"); if (accessPicture != null) { idx++; accessPicture.src = imagestub + "&idx=" + idx; setTimeout("doanimation(" + (nexttimeout > 3000 ? 3000 : nexttimeout * 1.2) + ")", nexttimeout); } } </script> <div style="clear:both; text-align:left;"> <img id="WebPicture" src="env/grafics/invisible.png"/> </div> #(/linkstructure)# <h3>Crawled Pages</h3> <p id="crawllist"></p> </fieldset> #(/crawlProfilesShow)# #%env/templates/footer.template%# </body> </html>