ready-prepared crawl list but at the stacks of the domains that are stored for balanced crawling. This affects also the balancer since that does not need to prepare the pre-selected crawl list for monitoring. As a effect: - it is no more possible to see the correct order of next to-be-crawled links, since that depends on the actual state of the balancer stack the next time another url is requested for loading - the balancer works better since the next url can be selected according to the current situation and not according to a pre-selected order.pull/1/head
parent
7e4e3fe5b6
commit
9ad1d8dde2
@ -0,0 +1,95 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': '#[queuename]#' Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateQueues">
|
||||
<div id="fullcontent">
|
||||
#(embed)#
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>'#[queuename]#' Crawl Queue</h2>
|
||||
::#(/embed)#
|
||||
|
||||
#(crawler)#
|
||||
<p>This crawler queue is empty</p>
|
||||
::
|
||||
#(embed)#
|
||||
<form action="IndexCreateQueues_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
Delete Entries:
|
||||
<input type="text" name="pattern" value="#[deletepattern]#" size="40" maxlength="200" />
|
||||
<select name="option" size="1">
|
||||
<option value="5">Initiator</option>
|
||||
<option value="3">Profile</option>
|
||||
<option value="4">Depth</option>
|
||||
<option value="6">Modified Date</option>
|
||||
<option value="2">Anchor Name</option>
|
||||
<option value="1" selected="selected">URL</option>
|
||||
</select>
|
||||
<input type="hidden" name="stack" value="#[queuename]#" />
|
||||
<input type="submit" name="delete" value="Delete" />
|
||||
</fieldset>
|
||||
</form>
|
||||
::#(/embed)#
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="5" />
|
||||
<col width="10" />
|
||||
<col width="30" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Count</th>
|
||||
<th>Delta/ms</th>
|
||||
<th>Host</th>
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>Delta/ms</th>
|
||||
<th>URL</th>
|
||||
</tr>
|
||||
#{host}#
|
||||
<tr class="TableCellDark">
|
||||
<td>#[hostcount]#</td>
|
||||
<td>#[hostdelta]#</td>
|
||||
<td><a href="IndexCreateQueues_p.html?#(embed)#::embed=&#(/embed)#delete=&stack=#[queuename]#&option=1&pattern=.*#[hostname]#.*&urlsPerHost=#[urlsPerHost]#"><img src="env/grafics/trash.gif"></a> #[hostname]#</td>
|
||||
<td colspan="7"></td>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCellLight">
|
||||
<td colspan="3"></td>
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td>#[delta]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</td>
|
||||
</tr>
|
||||
#{/host}#
|
||||
#(/crawler)#
|
||||
#(embed)#
|
||||
#%env/templates/footer.template%#
|
||||
::#(/embed)#
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
<!--
|
||||
parentPage = parent.document.getElementById('QueuesTable');
|
||||
if (parentPage != null) parentPage.height = document.getElementById('fullcontent').offsetHeight + 30;
|
||||
-->
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,58 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Global Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateWWWGlobalQueue">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>Global Crawl Queue</h2>
|
||||
<p>
|
||||
This queue stores the urls that shall be sent to other peers to perform a remote crawl.
|
||||
If there is no peer for remote crawling available, the links are crawled locally.
|
||||
</p>
|
||||
#(crawler-queue)#
|
||||
<p>The global crawler queue is empty</p>
|
||||
::
|
||||
<form action="IndexCreateWWWGlobalQueue_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<input type="submit" name="clearcrawlqueue" value="clear global crawl queue" />
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>There are <strong>#[num]#</strong> entries in the global crawler queue. Showing <strong>#[show-num]#</strong> most recent entries.</p>
|
||||
<p>Show last <a href="IndexCreateWWWGlobalQueue_p.html?limit=50">50</a> | <a href="IndexCreateWWWGlobalQueue_p.html?limit=100">100</a> | <a href="IndexCreateWWWGlobalQueue_p.html?limit=250">250</a> | <a href="IndexCreateWWWGlobalQueue_p.html?limit=500">500</a> entries.</p>
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="60" span="2" />
|
||||
<col width="10" />
|
||||
<col width="80" />
|
||||
<col width="180" />
|
||||
<col />
|
||||
<col width="10" />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>URL</th>
|
||||
<th>Delete</th>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
<td><a href="IndexCreateWWWGlobalQueue_p.html?deleteEntry=#[hash]#">[Delete]</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</table>
|
||||
#(/crawler-queue)#
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -1,125 +0,0 @@
|
||||
// IndexCreateWWWGlobalQueue_p.java
|
||||
// -------------------------------
|
||||
// part of the AnomicHTTPD caching proxy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2004, 2005
|
||||
//
|
||||
//$LastChangedDate$
|
||||
//$LastChangedRevision$
|
||||
//$LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
// You must compile this file with
|
||||
// javac -classpath .:../classes IndexCreate_p.java
|
||||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.NoticedURL;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class IndexCreateWWWGlobalQueue_p {
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
final serverObjects prop = new serverObjects();
|
||||
|
||||
int showLimit = 100;
|
||||
if (post != null) {
|
||||
showLimit = post.getInt("limit", 100);
|
||||
|
||||
if (post.containsKey("clearcrawlqueue")) {
|
||||
final int c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.LIMIT);
|
||||
try { sb.cleanProfiles(); } catch (final InterruptedException e) { /* Ignore this */}
|
||||
/*
|
||||
int c = 0;
|
||||
while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.StackType.LIMIT) > 0) {
|
||||
urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.StackType.LIMIT).hash();
|
||||
if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; }
|
||||
}
|
||||
*/
|
||||
prop.put("info", "3");//crawling queue cleared
|
||||
prop.putNum("info_numEntries", c);
|
||||
} else if (post.containsKey("deleteEntry")) {
|
||||
final String urlHash = post.get("deleteEntry");
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(urlHash.getBytes());
|
||||
prop.put("LOCATION","");
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
if (stackSize == 0) {
|
||||
prop.put("crawler-queue", "0");
|
||||
} else {
|
||||
prop.put("crawler-queue", "1");
|
||||
final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.LIMIT, showLimit);
|
||||
|
||||
Request urle;
|
||||
boolean dark = true;
|
||||
Seed initiator;
|
||||
String profileHandle;
|
||||
CrawlProfile profileEntry;
|
||||
int i, showNum = 0;
|
||||
for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
|
||||
urle = crawlerList.get(i);
|
||||
if (urle != null && urle.url() != null) {
|
||||
initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : ASCII.String(urle.initiator()));
|
||||
profileHandle = urle.profileHandle();
|
||||
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
|
||||
prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0");
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
|
||||
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
|
||||
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
|
||||
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) );
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name());
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
|
||||
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
|
||||
dark = !dark;
|
||||
showNum++;
|
||||
} else {
|
||||
stackSize--;
|
||||
}
|
||||
}
|
||||
prop.putNum("crawler-queue_show-num", showNum); //showin sjow-num most recent
|
||||
prop.putNum("crawler-queue_num", stackSize);//num Entries
|
||||
prop.putNum("crawler-queue_list", showNum);
|
||||
}
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Local Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateWWWLocalQueue">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>Local Crawl Queue</h2>
|
||||
<p>
|
||||
This queue stores the urls that shall be crawled localy by this peer.
|
||||
It may also contain urls that are computed by the proxy-prefetch.
|
||||
</p>
|
||||
|
||||
#(crawler-queue)#
|
||||
<p>The local crawler queue is empty</p>
|
||||
::
|
||||
<form action="IndexCreateWWWLocalQueue_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
Delete Entries:
|
||||
<input type="text" name="pattern" value=".*" size="40" maxlength="200" />
|
||||
<select name="option" size="1">
|
||||
<option value="5">Initiator</option>
|
||||
<option value="3">Profile</option>
|
||||
<option value="4">Depth</option>
|
||||
<option value="6">Modified Date</option>
|
||||
<option value="2">Anchor Name</option>
|
||||
<option value="1" selected="selected">URL</option>
|
||||
</select>
|
||||
<input type="submit" name="deleteEntries" value="Delete" /><em>This may take a quite long time.</em>
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>There are <strong>#[num]#</strong> entries in the local crawler queue. Showing <strong>#[show-num]#</strong> most recent entries.</p>
|
||||
<p>Show last <a href="IndexCreateWWWLocalQueue_p.html?limit=50">50</a> | <a href="IndexCreateWWWLocalQueue_p.html?limit=100">100</a> | <a href="IndexCreateWWWLocalQueue_p.html?limit=250">250</a> | <a href="IndexCreateWWWLocalQueue_p.html?limit=500">500</a> entries.</p>
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="60" span="2" />
|
||||
<col width="10" />
|
||||
<col width="80" />
|
||||
<col width="180" />
|
||||
<col />
|
||||
<col width="10" />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>URL</th>
|
||||
<th>Delete</th>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
<td><a href="IndexCreateWWWLocalQueue_p.html?deleteEntry=#[hash]#">[Delete]</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</table>
|
||||
#(/crawler-queue)#
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -1,65 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Remote Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateWWWGlobalQueue">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>Remote Crawl Queue</h2>
|
||||
<p>
|
||||
This queue stores the urls that other peers sent to you in order to perform a remote crawl for them.
|
||||
</p>
|
||||
#(crawler-queue)#
|
||||
<p>The remote crawler queue is empty</p>
|
||||
::
|
||||
<form action="IndexCreateWWWRemoteQueue_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<input type="submit" name="clearcrawlqueue" value="clear remote crawl queue" />
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>
|
||||
There are <strong>#[num]#</strong> entries in the remote crawler queue.
|
||||
Showing <strong>#[show-num]#</strong> most recent entries.
|
||||
</p>
|
||||
<p>
|
||||
Show last <a href="IndexCreateWWWRemoteQueue_p.html?limit=50">50</a> |
|
||||
<a href="IndexCreateWWWRemoteQueue_p.html?limit=100">100</a> |
|
||||
<a href="IndexCreateWWWRemoteQueue_p.html?limit=250">250</a> |
|
||||
<a href="IndexCreateWWWRemoteQueue_p.html?limit=500">500</a> entries.
|
||||
</p>
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="60" span="2" />
|
||||
<col width="10" />
|
||||
<col width="80" />
|
||||
<col width="180" />
|
||||
<col />
|
||||
<col width="10" />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>URL</th>
|
||||
<th>Delete</th>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
<td><a href="IndexCreateWWWRemoteQueue_p.html?deleteEntry=#[hash]#">[Delete]</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</table>
|
||||
#(/crawler-queue)#
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -1,120 +0,0 @@
|
||||
// IndexCreateWWWRemoteQueue_p.java
|
||||
// -------------------------------
|
||||
// part of the AnomicHTTPD caching proxy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2004, 2005
|
||||
// last major change: 04.07.2005
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
// You must compile this file with
|
||||
// javac -classpath .:../classes IndexCreateWWWRemoteQueue_p.java
|
||||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.NoticedURL;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.servletProperties;
|
||||
|
||||
public class IndexCreateWWWRemoteQueue_p {
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
final servletProperties prop = new servletProperties();
|
||||
final Switchboard sb = (Switchboard)env;
|
||||
|
||||
int showLimit = 100;
|
||||
if (post != null) {
|
||||
showLimit = post.getInt("limit", 100);
|
||||
|
||||
if (post.containsKey("clearcrawlqueue")) {
|
||||
final int c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.REMOTE);
|
||||
sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.REMOTE);
|
||||
try { sb.cleanProfiles(); } catch (final InterruptedException e) { /* Ignore this */}
|
||||
/*
|
||||
int c = 0;
|
||||
while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.StackType.LIMIT) > 0) {
|
||||
urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.StackType.LIMIT).hash();
|
||||
if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; }
|
||||
}
|
||||
*/
|
||||
prop.put("info", "3"); // crawling queue cleared
|
||||
prop.putNum("info_numEntries", c);
|
||||
} else if (post.containsKey("deleteEntry")) {
|
||||
final String urlHash = post.get("deleteEntry");
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(urlHash.getBytes());
|
||||
prop.put("LOCATION","");
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.REMOTE);
|
||||
if (stackSize == 0) {
|
||||
prop.put("crawler-queue", "0");
|
||||
} else {
|
||||
prop.put("crawler-queue", "1");
|
||||
final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.REMOTE, showLimit);
|
||||
|
||||
Request urle;
|
||||
boolean dark = true;
|
||||
Seed initiator;
|
||||
String profileHandle;
|
||||
CrawlProfile profileEntry;
|
||||
int i, showNum = 0;
|
||||
for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
|
||||
urle = crawlerList.get(i);
|
||||
if (urle != null && urle.url() != null) {
|
||||
initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : ASCII.String(urle.initiator()));
|
||||
profileHandle = urle.profileHandle();
|
||||
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
|
||||
prop.put("crawler-queue_list_" + showNum + "_dark", dark ? "1" : "0");
|
||||
prop.putHTML("crawler-queue_list_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
|
||||
prop.put("crawler-queue_list_" + showNum + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
|
||||
prop.put("crawler-queue_list_" + showNum + "_depth", urle.depth());
|
||||
prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.appdate()) );
|
||||
prop.putHTML("crawler-queue_list_" + showNum + "_anchor", urle.name());
|
||||
prop.putHTML("crawler-queue_list_" + showNum + "_url", urle.url().toString());
|
||||
prop.put("crawler-queue_list_" + showNum + "_hash", urle.url().hash());
|
||||
dark = !dark;
|
||||
showNum++;
|
||||
} else {
|
||||
stackSize--;
|
||||
}
|
||||
}
|
||||
prop.putNum("crawler-queue_show-num", showNum); //showin sjow-num most recent
|
||||
prop.putNum("crawler-queue_num", stackSize);//num Entries
|
||||
prop.putNum("crawler-queue_list", showNum);
|
||||
}
|
||||
return prop;
|
||||
}
|
||||
}
|
@ -1,124 +0,0 @@
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.index.Segments;
|
||||
import de.anomic.crawler.NoticedURL;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class queues_p {
|
||||
|
||||
public static final String STATE_RUNNING = "running";
|
||||
public static final String STATE_PAUSED = "paused";
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
//wikiCode wikiTransformer = new wikiCode(switchboard);
|
||||
final serverObjects prop = new serverObjects();
|
||||
Segment segment = null;
|
||||
final boolean html = post != null && post.containsKey("html");
|
||||
prop.setLocalized(html);
|
||||
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) {
|
||||
segment = sb.indexSegments.segment(post.get("segment"));
|
||||
}
|
||||
if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
|
||||
prop.put("rejected", "0");
|
||||
//int showRejectedCount = 10;
|
||||
|
||||
Seed initiator;
|
||||
|
||||
// index size
|
||||
prop.putNum("urlpublictextSize", segment.urlMetadata().size());
|
||||
prop.putNum("rwipublictextSize", segment.termIndex().sizesMax());
|
||||
|
||||
// loader queue
|
||||
prop.putNum("loaderSize", sb.crawlQueues.workerSize());
|
||||
prop.putNum("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10));
|
||||
if (sb.crawlQueues.workerSize() == 0) {
|
||||
prop.put("list-loader", "0");
|
||||
} else {
|
||||
final Request[] w = sb.crawlQueues.activeWorkerEntries();
|
||||
int count = 0;
|
||||
for (final Request r : w) {
|
||||
if (r == null) continue;
|
||||
prop.put("list-loader_"+count+"_profile", r.profileHandle());
|
||||
initiator = sb.peers.getConnected((r.initiator() == null) ? "" : ASCII.String(r.initiator()));
|
||||
prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
|
||||
prop.put("list-loader_"+count+"_depth", r.depth());
|
||||
prop.putXML("list-loader_"+count+"_url", r.url().toString());
|
||||
count++;
|
||||
}
|
||||
prop.put("list-loader", count);
|
||||
}
|
||||
|
||||
//local crawl queue
|
||||
prop.putNum("localCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount());
|
||||
prop.put("localCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? STATE_PAUSED : STATE_RUNNING);
|
||||
int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
|
||||
addNTable(sb, prop, "list-local", sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, Math.min(10, stackSize)));
|
||||
|
||||
//global crawl queue
|
||||
prop.putNum("limitCrawlSize", sb.crawlQueues.limitCrawlJobSize());
|
||||
prop.put("limitCrawlState", STATE_RUNNING);
|
||||
stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
|
||||
//remote crawl queue
|
||||
prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
|
||||
prop.put("remoteCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? STATE_PAUSED : STATE_RUNNING);
|
||||
stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
|
||||
if (stackSize == 0) {
|
||||
prop.put("list-remote", "0");
|
||||
} else {
|
||||
addNTable(sb, prop, "list-remote", sb.crawlQueues.noticeURL.top(NoticedURL.StackType.LIMIT, Math.min(10, stackSize)));
|
||||
}
|
||||
|
||||
//noload crawl queue
|
||||
prop.putNum("noloadCrawlSize", sb.crawlQueues.noloadCrawlJobSize());
|
||||
prop.put("noloadCrawlState", STATE_RUNNING);
|
||||
//stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.NOLOAD);
|
||||
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
|
||||
|
||||
public static final void addNTable(final Switchboard sb, final serverObjects prop, final String tableName, final List<Request> crawlerList) {
|
||||
|
||||
int showNum = 0;
|
||||
Seed initiator;
|
||||
for (final Request urle : crawlerList) {
|
||||
if ((urle != null) && (urle.url() != null)) {
|
||||
initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : UTF8.String(urle.initiator()));
|
||||
prop.put(tableName + "_" + showNum + "_profile", urle.profileHandle());
|
||||
prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
|
||||
prop.put(tableName + "_" + showNum + "_depth", urle.depth());
|
||||
prop.put(tableName + "_" + showNum + "_modified", daydate(urle.appdate()));
|
||||
prop.putXML(tableName + "_" + showNum + "_anchor", urle.name());
|
||||
prop.putXML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true));
|
||||
prop.put(tableName + "_" + showNum + "_hash", urle.url().hash());
|
||||
showNum++;
|
||||
}
|
||||
}
|
||||
prop.put(tableName, showNum);
|
||||
|
||||
}
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
<?xml version="1.0"?>
|
||||
<queues>
|
||||
<dbsize>
|
||||
<urlpublictext>#[urlpublictextSize]#</urlpublictext>
|
||||
<rwipublictext>#[rwipublictextSize]#</rwipublictext>
|
||||
</dbsize>
|
||||
<loaderqueue>
|
||||
<size>#[loaderSize]#</size>
|
||||
<max>#[loaderMax]#</max>
|
||||
#{list-loader}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<url>#[url]#</url>
|
||||
</entry>
|
||||
#{/list-loader}#
|
||||
</loaderqueue>
|
||||
<localcrawlerqueue>
|
||||
<size>#[localCrawlSize]#</size>
|
||||
<state>#[localCrawlState]#</state>
|
||||
#{list-local}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<modified>#[modified]#</modified>
|
||||
<anchor>#[anchor]#</anchor>
|
||||
<url>#[url]#</url>
|
||||
<hash>#[hash]#</hash>
|
||||
<inProcess>#(inProcess)#false::true#(/inProcess)#</inProcess>
|
||||
</entry>
|
||||
#{/list-local}#
|
||||
</localcrawlerqueue>
|
||||
<limitcrawlerqueue>
|
||||
<size>#[limitCrawlSize]#</size>
|
||||
<state>#[limitCrawlState]#</state>
|
||||
#{list-limit}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<modified>#[modified]#</modified>
|
||||
<anchor>#[anchor]#</anchor>
|
||||
<url>#[url]#</url>
|
||||
<hash>#[hash]#</hash>
|
||||
<inProcess>#(inProcess)#false::true#(/inProcess)#</inProcess>
|
||||
</entry>
|
||||
#{/list-limit}#
|
||||
</limitcrawlerqueue>
|
||||
<remotecrawlerqueue>
|
||||
<size>#[remoteCrawlSize]#</size>
|
||||
<state>#[remoteCrawlState]#</state>
|
||||
#{list-remote}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<modified>#[modified]#</modified>
|
||||
<anchor>#[anchor]#</anchor>
|
||||
<url>#[url]#</url>
|
||||
<hash>#[hash]#</hash>
|
||||
<inProcess>#(inProcess)#false::true#(/inProcess)#</inProcess>
|
||||
</entry>
|
||||
#{/list-remote}#
|
||||
</remotecrawlerqueue>
|
||||
<noloadcrawlerqueue>
|
||||
<size>#[noloadCrawlSize]#</size>
|
||||
<state>#[noloadCrawlState]#</state>
|
||||
</noloadcrawlerqueue>
|
||||
</queues>
|
@ -1,35 +1,52 @@
|
||||
<?xml version="1.0"?>
|
||||
<status>
|
||||
<ppm>#[ppm]#</ppm>
|
||||
|
||||
<wordCacheSize>#[wordCacheSize]#</wordCacheSize>
|
||||
<wordCacheMaxSize>#[wordCacheMaxSize]#</wordCacheMaxSize>
|
||||
|
||||
<memory>
|
||||
<free>#[freeMemory]#</free>
|
||||
<total>#[totalMemory]#</total>
|
||||
<max>#[maxMemory]#</max>
|
||||
</memory>
|
||||
|
||||
<processors>#[processors]#</processors>
|
||||
|
||||
<traffic>
|
||||
<in>#[trafficIn]#</in>
|
||||
<proxy>#[trafficProxy]#</proxy>
|
||||
<crawler>#[trafficCrawler]#</crawler>
|
||||
</traffic>
|
||||
|
||||
<dbsize>
|
||||
<urlpublictext>#[urlpublictextSize]#</urlpublictext>
|
||||
<rwipublictext>#[rwipublictextSize]#</rwipublictext>
|
||||
</dbsize>
|
||||
|
||||
<loaderqueue>
|
||||
<size>#[loaderSize]#</size>
|
||||
<max>#[loaderMax]#</max>
|
||||
</loaderqueue>
|
||||
|
||||
<localcrawlerqueue>
|
||||
<size>#[localCrawlSize]#</size>
|
||||
<state>#[localCrawlState]#</state>
|
||||
</localcrawlerqueue>
|
||||
|
||||
<limitcrawlerqueue>
|
||||
<size>#[limitCrawlSize]#</size>
|
||||
<state>#[limitCrawlState]#</state>
|
||||
</limitcrawlerqueue>
|
||||
|
||||
<remotecrawlerqueue>
|
||||
<size>#[remoteCrawlSize]#</size>
|
||||
<state>#[remoteCrawlState]#</state>
|
||||
</remotecrawlerqueue>
|
||||
|
||||
<noloadcrawlerqueue>
|
||||
<size>#[noloadCrawlSize]#</size>
|
||||
<state>#[noloadCrawlState]#</state>
|
||||
</noloadcrawlerqueue>
|
||||
|
||||
<memory>
|
||||
<free>#[freeMemory]#</free>
|
||||
<total>#[totalMemory]#</total>
|
||||
<max>#[maxMemory]#</max>
|
||||
</memory>
|
||||
<processors>#[processors]#</processors>
|
||||
<traffic>
|
||||
<in>#[trafficIn]#</in>
|
||||
<proxy>#[trafficProxy]#</proxy>
|
||||
<crawler>#[trafficCrawler]#</crawler>
|
||||
</traffic>
|
||||
</status>
|
||||
|
After Width: | Height: | Size: 932 B |
Loading…
Reference in new issue