commit
4f92389550
@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
cd "`dirname $0`"
|
||||
port=$(grep ^port= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
|
||||
pw=$(grep ^adminAccountBase64MD5= ../DATA/SETTINGS/yacy.conf |cut -d= -f2)
|
||||
|
||||
if which curl &>/dev/null; then
|
||||
curl -s --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1"
|
||||
elif which wget &>/dev/null; then
|
||||
wget -q -t 1 --timeout=5 --header "Authorization: realm=$pw" "http://127.0.0.1:$port/$1"
|
||||
else
|
||||
exit 1
|
||||
fi
|
@ -0,0 +1,95 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': '#[queuename]#' Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateQueues">
|
||||
<div id="fullcontent">
|
||||
#(embed)#
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>'#[queuename]#' Crawl Queue</h2>
|
||||
::#(/embed)#
|
||||
|
||||
#(crawler)#
|
||||
<p>This crawler queue is empty</p>
|
||||
::
|
||||
#(embed)#
|
||||
<form action="IndexCreateQueues_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
Delete Entries:
|
||||
<input type="text" name="pattern" value="#[deletepattern]#" size="40" maxlength="200" />
|
||||
<select name="option" size="1">
|
||||
<option value="5">Initiator</option>
|
||||
<option value="3">Profile</option>
|
||||
<option value="4">Depth</option>
|
||||
<option value="6">Modified Date</option>
|
||||
<option value="2">Anchor Name</option>
|
||||
<option value="1" selected="selected">URL</option>
|
||||
</select>
|
||||
<input type="hidden" name="stack" value="#[queuename]#" />
|
||||
<input type="submit" name="delete" value="Delete" />
|
||||
</fieldset>
|
||||
</form>
|
||||
::#(/embed)#
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="5" />
|
||||
<col width="10" />
|
||||
<col width="30" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col width="10" />
|
||||
<col />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Count</th>
|
||||
<th>Delta/ms</th>
|
||||
<th>Host</th>
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>Delta/ms</th>
|
||||
<th>URL</th>
|
||||
</tr>
|
||||
#{host}#
|
||||
<tr class="TableCellDark">
|
||||
<td>#[hostcount]#</td>
|
||||
<td>#[hostdelta]#</td>
|
||||
<td><a href="IndexCreateQueues_p.html?#(embed)#::embed=&#(/embed)#delete=&stack=#[queuename]#&option=1&pattern=.*#[hostname]#.*&urlsPerHost=#[urlsPerHost]#"><img src="env/grafics/trash.gif"></a> #[hostname]#</td>
|
||||
<td colspan="7"></td>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCellLight">
|
||||
<td colspan="3"></td>
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td>#[delta]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</td>
|
||||
</tr>
|
||||
#{/host}#
|
||||
#(/crawler)#
|
||||
#(embed)#
|
||||
#%env/templates/footer.template%#
|
||||
::#(/embed)#
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
<!--
|
||||
parentPage = parent.document.getElementById('QueuesTable');
|
||||
if (parentPage != null) parentPage.height = document.getElementById('fullcontent').offsetHeight + 30;
|
||||
-->
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,192 +1,169 @@
|
||||
// IndexCreateWWWLocalQueue_p.java
|
||||
// -------------------------------
|
||||
// part of the AnomicHTTPD caching proxy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2004, 2005
|
||||
//
|
||||
//$LastChangedDate$
|
||||
//$LastChangedRevision$
|
||||
//$LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
// You must compile this file with
|
||||
// javac -classpath .:../classes IndexCreate_p.java
|
||||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.NoticedURL;
|
||||
import de.anomic.crawler.CrawlSwitchboard;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class IndexCreateWWWLocalQueue_p {
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
private static final int INVALID = 0;
|
||||
private static final int URL = 1;
|
||||
private static final int ANCHOR = 2;
|
||||
private static final int PROFILE = 3;
|
||||
private static final int DEPTH = 4;
|
||||
private static final int INITIATOR = 5;
|
||||
private static final int MODIFIED = 6;
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
final serverObjects prop = new serverObjects();
|
||||
|
||||
int showLimit = 100;
|
||||
if (post != null) {
|
||||
showLimit = post.getInt("limit", 100);
|
||||
|
||||
if (post.containsKey("deleteEntries")) {
|
||||
int c = 0;
|
||||
|
||||
final String pattern = post.get("pattern", ".*").trim();
|
||||
final int option = post.getInt("option", INVALID);
|
||||
if (".*".equals(pattern)) {
|
||||
c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
|
||||
sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.CORE);
|
||||
try { sb.cleanProfiles(); } catch (final InterruptedException e) {/* ignore this */}
|
||||
} else if (option > INVALID) {
|
||||
try {
|
||||
// compiling the regular expression
|
||||
final Pattern compiledPattern = Pattern.compile(pattern);
|
||||
|
||||
if (option == PROFILE) {
|
||||
// search and delete the crawl profile (_much_ faster, independant of queue size)
|
||||
// XXX: what to do about the annoying LOST PROFILE messages in the log?
|
||||
CrawlProfile entry;
|
||||
for (final byte[] handle: sb.crawler.getActive()) {
|
||||
entry = sb.crawler.getActive(handle);
|
||||
final String name = entry.name();
|
||||
if (name.equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE))
|
||||
continue;
|
||||
if (compiledPattern.matcher(name).find()) sb.crawler.removeActive(entry.handle().getBytes());
|
||||
}
|
||||
} else {
|
||||
// iterating through the list of URLs
|
||||
final Iterator<Request> iter = sb.crawlQueues.noticeURL.iterator(NoticedURL.StackType.CORE);
|
||||
Request entry;
|
||||
final List<byte[]> removehashes = new ArrayList<byte[]>();
|
||||
while (iter.hasNext()) {
|
||||
if ((entry = iter.next()) == null) continue;
|
||||
String value = null;
|
||||
|
||||
location: switch (option) {
|
||||
case URL: value = (entry.url() == null) ? null : entry.url().toString(); break location;
|
||||
case ANCHOR: value = entry.name(); break location;
|
||||
case DEPTH: value = Integer.toString(entry.depth()); break location;
|
||||
case INITIATOR:
|
||||
value = (entry.initiator() == null || entry.initiator().length == 0) ? "proxy" : ASCII.String(entry.initiator());
|
||||
break location;
|
||||
case MODIFIED: value = daydate(entry.appdate()); break location;
|
||||
default: value = null; break location;
|
||||
}
|
||||
|
||||
if (value != null && compiledPattern.matcher(value).matches()) removehashes.add(entry.url().hash());
|
||||
}
|
||||
Log.logInfo("IndexCreateWWWLocalQueue", "created a remove list with " + removehashes.size() + " entries for pattern '" + pattern + "'");
|
||||
for (final byte[] b: removehashes) {
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(b);
|
||||
}
|
||||
}
|
||||
} catch (final PatternSyntaxException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
}
|
||||
|
||||
prop.put("info", "3");//crawling queue cleared
|
||||
prop.putNum("info_numEntries", c);
|
||||
} else if (post.containsKey("deleteEntry")) {
|
||||
final String urlHash = post.get("deleteEntry");
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(urlHash.getBytes());
|
||||
prop.put("LOCATION","");
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
int showNum = 0, stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
|
||||
if (stackSize == 0) {
|
||||
prop.put("crawler-queue", "0");
|
||||
} else {
|
||||
prop.put("crawler-queue", "1");
|
||||
final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, (int) (showLimit * 1.20));
|
||||
|
||||
Request urle;
|
||||
boolean dark = true;
|
||||
Seed initiator;
|
||||
String profileHandle;
|
||||
CrawlProfile profileEntry;
|
||||
int i;
|
||||
for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
|
||||
urle = crawlerList.get(i);
|
||||
if ((urle != null)&&(urle.url()!=null)) {
|
||||
initiator = sb.peers.getConnected(urle.initiator() == null ? "" : ASCII.String(urle.initiator()));
|
||||
profileHandle = urle.profileHandle();
|
||||
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
|
||||
prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0");
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
|
||||
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
|
||||
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
|
||||
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) );
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name());
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
|
||||
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
|
||||
dark = !dark;
|
||||
showNum++;
|
||||
} else {
|
||||
stackSize--;
|
||||
}
|
||||
}
|
||||
prop.putNum("crawler-queue_list", showNum);
|
||||
prop.putNum("crawler-queue_num", stackSize);//num Entries
|
||||
prop.putNum("crawler-queue_show-num", showNum); //showin sjow-num most recent
|
||||
|
||||
}
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.CrawlSwitchboard;
|
||||
import de.anomic.crawler.NoticedURL.StackType;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class IndexCreateQueues_p {
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
private static final int INVALID = 0;
|
||||
private static final int URL = 1;
|
||||
private static final int ANCHOR = 2;
|
||||
private static final int PROFILE = 3;
|
||||
private static final int DEPTH = 4;
|
||||
private static final int INITIATOR = 5;
|
||||
private static final int MODIFIED = 6;
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
final serverObjects prop = new serverObjects();
|
||||
StackType stackType = StackType.LOCAL;
|
||||
int urlsPerHost = 5;
|
||||
boolean embed = false;
|
||||
String deletepattern = ".*";
|
||||
|
||||
if (post != null) {
|
||||
stackType = StackType.valueOf(post.get("stack", stackType.name()).toUpperCase());
|
||||
urlsPerHost = post.getInt("urlsPerHost", urlsPerHost);
|
||||
if (post.containsKey("embed")) embed = true;
|
||||
|
||||
if (post.containsKey("delete")) {
|
||||
deletepattern = post.get("pattern", deletepattern).trim();
|
||||
final int option = post.getInt("option", INVALID);
|
||||
if (".*".equals(deletepattern)) {
|
||||
sb.crawlQueues.noticeURL.clear(stackType);
|
||||
try { sb.cleanProfiles(); } catch (final InterruptedException e) {/* ignore this */}
|
||||
} else if (option > INVALID) {
|
||||
try {
|
||||
// compiling the regular expression
|
||||
final Pattern compiledPattern = Pattern.compile(deletepattern);
|
||||
|
||||
if (option == PROFILE) {
|
||||
// search and delete the crawl profile (_much_ faster, independant of queue size)
|
||||
// XXX: what to do about the annoying LOST PROFILE messages in the log?
|
||||
CrawlProfile entry;
|
||||
for (final byte[] handle: sb.crawler.getActive()) {
|
||||
entry = sb.crawler.getActive(handle);
|
||||
final String name = entry.name();
|
||||
if (name.equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ||
|
||||
name.equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE))
|
||||
continue;
|
||||
if (compiledPattern.matcher(name).find()) sb.crawler.removeActive(entry.handle().getBytes());
|
||||
}
|
||||
} else {
|
||||
// iterating through the list of URLs
|
||||
final Iterator<Request> iter = sb.crawlQueues.noticeURL.iterator(stackType);
|
||||
Request entry;
|
||||
final List<byte[]> removehashes = new ArrayList<byte[]>();
|
||||
while (iter.hasNext()) {
|
||||
if ((entry = iter.next()) == null) continue;
|
||||
String value = null;
|
||||
|
||||
location: switch (option) {
|
||||
case URL: value = (entry.url() == null) ? null : entry.url().toString(); break location;
|
||||
case ANCHOR: value = entry.name(); break location;
|
||||
case DEPTH: value = Integer.toString(entry.depth()); break location;
|
||||
case INITIATOR:
|
||||
value = (entry.initiator() == null || entry.initiator().length == 0) ? "proxy" : ASCII.String(entry.initiator());
|
||||
break location;
|
||||
case MODIFIED: value = daydate(entry.appdate()); break location;
|
||||
default: value = null; break location;
|
||||
}
|
||||
|
||||
if (value != null && compiledPattern.matcher(value).matches()) removehashes.add(entry.url().hash());
|
||||
}
|
||||
Log.logInfo("IndexCreateQueues_p", "created a remove list with " + removehashes.size() + " entries for pattern '" + deletepattern + "'");
|
||||
for (final byte[] b: removehashes) {
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(b);
|
||||
}
|
||||
}
|
||||
} catch (final PatternSyntaxException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int stackSize = sb.crawlQueues.noticeURL.stackSize(stackType);
|
||||
if (stackSize == 0) {
|
||||
prop.put("crawler", "0");
|
||||
} else {
|
||||
prop.put("crawler", "1");
|
||||
prop.put("crawler_embed", embed ? 1 : 0);
|
||||
prop.put("crawler_embed_deletepattern", deletepattern);
|
||||
prop.put("crawler_embed_queuename", stackType.name());
|
||||
|
||||
final Map<String, Integer[]> hosts = sb.crawlQueues.noticeURL.getDomainStackHosts(stackType);
|
||||
|
||||
int hc = 0;
|
||||
for (Map.Entry<String, Integer[]> host: hosts.entrySet()) {
|
||||
prop.putHTML("crawler_host_" + hc + "_hostname", host.getKey());
|
||||
prop.put("crawler_host_" + hc + "_embed", embed ? 1 : 0);
|
||||
prop.put("crawler_host_" + hc + "_urlsPerHost", urlsPerHost);
|
||||
prop.putHTML("crawler_host_" + hc + "_queuename", stackType.name());
|
||||
prop.put("crawler_host_" + hc + "_hostcount", host.getValue()[0]);
|
||||
prop.put("crawler_host_" + hc + "_hostdelta", host.getValue()[1]);
|
||||
List<Request> domainStackReferences = sb.crawlQueues.noticeURL.getDomainStackReferences(stackType, host.getKey(), urlsPerHost);
|
||||
|
||||
Seed initiator;
|
||||
String profileHandle;
|
||||
CrawlProfile profileEntry;
|
||||
int count = 0;
|
||||
for (Request request: domainStackReferences) {
|
||||
if (request == null) continue;
|
||||
initiator = sb.peers.getConnected(request.initiator() == null ? "" : ASCII.String(request.initiator()));
|
||||
profileHandle = request.profileHandle();
|
||||
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
|
||||
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
|
||||
prop.put("crawler_host_" + hc + "_list_" + count + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
|
||||
prop.put("crawler_host_" + hc + "_list_" + count + "_depth", request.depth());
|
||||
prop.put("crawler_host_" + hc + "_list_" + count + "_modified", daydate(request.appdate()) );
|
||||
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_anchor", request.name());
|
||||
prop.put("crawler_host_" + hc + "_list_" + count + "_delta", sb.crawlQueues.noticeURL.getDomainSleepTime(stackType, sb.crawler, request));
|
||||
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_url", request.url().toNormalform(false, true));
|
||||
prop.put("crawler_host_" + hc + "_list_" + count + "_hash", request.url().hash());
|
||||
count++;
|
||||
}
|
||||
prop.putNum("crawler_host_" + hc + "_list", count);
|
||||
hc++;
|
||||
}
|
||||
prop.put("crawler_host", hc);
|
||||
}
|
||||
|
||||
prop.put("embed", embed ? 1 : 0);
|
||||
prop.put("queuename", stackType.name().charAt(0) + stackType.name().substring(1).toLowerCase());
|
||||
prop.put("embed_queuename", stackType.name().charAt(0) + stackType.name().substring(1).toLowerCase());
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Global Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateWWWGlobalQueue">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>Global Crawl Queue</h2>
|
||||
<p>
|
||||
This queue stores the urls that shall be sent to other peers to perform a remote crawl.
|
||||
If there is no peer for remote crawling available, the links are crawled locally.
|
||||
</p>
|
||||
#(crawler-queue)#
|
||||
<p>The global crawler queue is empty</p>
|
||||
::
|
||||
<form action="IndexCreateWWWGlobalQueue_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<input type="submit" name="clearcrawlqueue" value="clear global crawl queue" />
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>There are <strong>#[num]#</strong> entries in the global crawler queue. Showing <strong>#[show-num]#</strong> most recent entries.</p>
|
||||
<p>Show last <a href="IndexCreateWWWGlobalQueue_p.html?limit=50">50</a> | <a href="IndexCreateWWWGlobalQueue_p.html?limit=100">100</a> | <a href="IndexCreateWWWGlobalQueue_p.html?limit=250">250</a> | <a href="IndexCreateWWWGlobalQueue_p.html?limit=500">500</a> entries.</p>
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="60" span="2" />
|
||||
<col width="10" />
|
||||
<col width="80" />
|
||||
<col width="180" />
|
||||
<col />
|
||||
<col width="10" />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>URL</th>
|
||||
<th>Delete</th>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
<td><a href="IndexCreateWWWGlobalQueue_p.html?deleteEntry=#[hash]#">[Delete]</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</table>
|
||||
#(/crawler-queue)#
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -1,125 +0,0 @@
|
||||
// IndexCreateWWWGlobalQueue_p.java
|
||||
// -------------------------------
|
||||
// part of the AnomicHTTPD caching proxy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2004, 2005
|
||||
//
|
||||
//$LastChangedDate$
|
||||
//$LastChangedRevision$
|
||||
//$LastChangedBy$
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
// You must compile this file with
|
||||
// javac -classpath .:../classes IndexCreate_p.java
|
||||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.NoticedURL;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class IndexCreateWWWGlobalQueue_p {
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
final serverObjects prop = new serverObjects();
|
||||
|
||||
int showLimit = 100;
|
||||
if (post != null) {
|
||||
showLimit = post.getInt("limit", 100);
|
||||
|
||||
if (post.containsKey("clearcrawlqueue")) {
|
||||
final int c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.LIMIT);
|
||||
try { sb.cleanProfiles(); } catch (final InterruptedException e) { /* Ignore this */}
|
||||
/*
|
||||
int c = 0;
|
||||
while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.StackType.LIMIT) > 0) {
|
||||
urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.StackType.LIMIT).hash();
|
||||
if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; }
|
||||
}
|
||||
*/
|
||||
prop.put("info", "3");//crawling queue cleared
|
||||
prop.putNum("info_numEntries", c);
|
||||
} else if (post.containsKey("deleteEntry")) {
|
||||
final String urlHash = post.get("deleteEntry");
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(urlHash.getBytes());
|
||||
prop.put("LOCATION","");
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
if (stackSize == 0) {
|
||||
prop.put("crawler-queue", "0");
|
||||
} else {
|
||||
prop.put("crawler-queue", "1");
|
||||
final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.LIMIT, showLimit);
|
||||
|
||||
Request urle;
|
||||
boolean dark = true;
|
||||
Seed initiator;
|
||||
String profileHandle;
|
||||
CrawlProfile profileEntry;
|
||||
int i, showNum = 0;
|
||||
for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
|
||||
urle = crawlerList.get(i);
|
||||
if (urle != null && urle.url() != null) {
|
||||
initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : ASCII.String(urle.initiator()));
|
||||
profileHandle = urle.profileHandle();
|
||||
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
|
||||
prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0");
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
|
||||
prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
|
||||
prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
|
||||
prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.appdate()) );
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name());
|
||||
prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true));
|
||||
prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash());
|
||||
dark = !dark;
|
||||
showNum++;
|
||||
} else {
|
||||
stackSize--;
|
||||
}
|
||||
}
|
||||
prop.putNum("crawler-queue_show-num", showNum); //showin sjow-num most recent
|
||||
prop.putNum("crawler-queue_num", stackSize);//num Entries
|
||||
prop.putNum("crawler-queue_list", showNum);
|
||||
}
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Local Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateWWWLocalQueue">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>Local Crawl Queue</h2>
|
||||
<p>
|
||||
This queue stores the urls that shall be crawled localy by this peer.
|
||||
It may also contain urls that are computed by the proxy-prefetch.
|
||||
</p>
|
||||
|
||||
#(crawler-queue)#
|
||||
<p>The local crawler queue is empty</p>
|
||||
::
|
||||
<form action="IndexCreateWWWLocalQueue_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
Delete Entries:
|
||||
<input type="text" name="pattern" value=".*" size="40" maxlength="200" />
|
||||
<select name="option" size="1">
|
||||
<option value="5">Initiator</option>
|
||||
<option value="3">Profile</option>
|
||||
<option value="4">Depth</option>
|
||||
<option value="6">Modified Date</option>
|
||||
<option value="2">Anchor Name</option>
|
||||
<option value="1" selected="selected">URL</option>
|
||||
</select>
|
||||
<input type="submit" name="deleteEntries" value="Delete" /><em>This may take a quite long time.</em>
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>There are <strong>#[num]#</strong> entries in the local crawler queue. Showing <strong>#[show-num]#</strong> most recent entries.</p>
|
||||
<p>Show last <a href="IndexCreateWWWLocalQueue_p.html?limit=50">50</a> | <a href="IndexCreateWWWLocalQueue_p.html?limit=100">100</a> | <a href="IndexCreateWWWLocalQueue_p.html?limit=250">250</a> | <a href="IndexCreateWWWLocalQueue_p.html?limit=500">500</a> entries.</p>
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="60" span="2" />
|
||||
<col width="10" />
|
||||
<col width="80" />
|
||||
<col width="180" />
|
||||
<col />
|
||||
<col width="10" />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>URL</th>
|
||||
<th>Delete</th>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
<td><a href="IndexCreateWWWLocalQueue_p.html?deleteEntry=#[hash]#">[Delete]</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</table>
|
||||
#(/crawler-queue)#
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -1,65 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Remote Crawl Queue</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexCreateWWWGlobalQueue">
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuCrawlMonitor.template%#
|
||||
<h2>Remote Crawl Queue</h2>
|
||||
<p>
|
||||
This queue stores the urls that other peers sent to you in order to perform a remote crawl for them.
|
||||
</p>
|
||||
#(crawler-queue)#
|
||||
<p>The remote crawler queue is empty</p>
|
||||
::
|
||||
<form action="IndexCreateWWWRemoteQueue_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<input type="submit" name="clearcrawlqueue" value="clear remote crawl queue" />
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>
|
||||
There are <strong>#[num]#</strong> entries in the remote crawler queue.
|
||||
Showing <strong>#[show-num]#</strong> most recent entries.
|
||||
</p>
|
||||
<p>
|
||||
Show last <a href="IndexCreateWWWRemoteQueue_p.html?limit=50">50</a> |
|
||||
<a href="IndexCreateWWWRemoteQueue_p.html?limit=100">100</a> |
|
||||
<a href="IndexCreateWWWRemoteQueue_p.html?limit=250">250</a> |
|
||||
<a href="IndexCreateWWWRemoteQueue_p.html?limit=500">500</a> entries.
|
||||
</p>
|
||||
<table border="0" cellpadding="2" cellspacing="1">
|
||||
<colgroup>
|
||||
<col width="60" span="2" />
|
||||
<col width="10" />
|
||||
<col width="80" />
|
||||
<col width="180" />
|
||||
<col />
|
||||
<col width="10" />
|
||||
</colgroup>
|
||||
<tr class="TableHeader">
|
||||
<th>Initiator</th>
|
||||
<th>Profile</th>
|
||||
<th>Depth</th>
|
||||
<th>Modified Date</th>
|
||||
<th>Anchor Name</th>
|
||||
<th>URL</th>
|
||||
<th>Delete</th>
|
||||
</tr>
|
||||
#{list}#
|
||||
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
|
||||
<td>#[initiator]#</td>
|
||||
<td>#[profile]#</td>
|
||||
<td>#[depth]#</td>
|
||||
<td>#[modified]#</td>
|
||||
<td>#[anchor]#</td>
|
||||
<td><a href="#[url]#">#[url]#</a></td>
|
||||
<td><a href="IndexCreateWWWRemoteQueue_p.html?deleteEntry=#[hash]#">[Delete]</a></td>
|
||||
</tr>
|
||||
#{/list}#
|
||||
</table>
|
||||
#(/crawler-queue)#
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
@ -1,120 +0,0 @@
|
||||
// IndexCreateWWWRemoteQueue_p.java
|
||||
// -------------------------------
|
||||
// part of the AnomicHTTPD caching proxy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://www.anomic.de
|
||||
// Frankfurt, Germany, 2004, 2005
|
||||
// last major change: 04.07.2005
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
// You must compile this file with
|
||||
// javac -classpath .:../classes IndexCreateWWWRemoteQueue_p.java
|
||||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.NoticedURL;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.servletProperties;
|
||||
|
||||
public class IndexCreateWWWRemoteQueue_p {
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
final servletProperties prop = new servletProperties();
|
||||
final Switchboard sb = (Switchboard)env;
|
||||
|
||||
int showLimit = 100;
|
||||
if (post != null) {
|
||||
showLimit = post.getInt("limit", 100);
|
||||
|
||||
if (post.containsKey("clearcrawlqueue")) {
|
||||
final int c = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.REMOTE);
|
||||
sb.crawlQueues.noticeURL.clear(NoticedURL.StackType.REMOTE);
|
||||
try { sb.cleanProfiles(); } catch (final InterruptedException e) { /* Ignore this */}
|
||||
/*
|
||||
int c = 0;
|
||||
while (switchboard.urlPool.noticeURL.stackSize(plasmaCrawlNURL.StackType.LIMIT) > 0) {
|
||||
urlHash = switchboard.urlPool.noticeURL.pop(plasmaCrawlNURL.StackType.LIMIT).hash();
|
||||
if (urlHash != null) { switchboard.urlPool.noticeURL.remove(urlHash); c++; }
|
||||
}
|
||||
*/
|
||||
prop.put("info", "3"); // crawling queue cleared
|
||||
prop.putNum("info_numEntries", c);
|
||||
} else if (post.containsKey("deleteEntry")) {
|
||||
final String urlHash = post.get("deleteEntry");
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(urlHash.getBytes());
|
||||
prop.put("LOCATION","");
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.REMOTE);
|
||||
if (stackSize == 0) {
|
||||
prop.put("crawler-queue", "0");
|
||||
} else {
|
||||
prop.put("crawler-queue", "1");
|
||||
final List<Request> crawlerList = sb.crawlQueues.noticeURL.top(NoticedURL.StackType.REMOTE, showLimit);
|
||||
|
||||
Request urle;
|
||||
boolean dark = true;
|
||||
Seed initiator;
|
||||
String profileHandle;
|
||||
CrawlProfile profileEntry;
|
||||
int i, showNum = 0;
|
||||
for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
|
||||
urle = crawlerList.get(i);
|
||||
if (urle != null && urle.url() != null) {
|
||||
initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : ASCII.String(urle.initiator()));
|
||||
profileHandle = urle.profileHandle();
|
||||
profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
|
||||
prop.put("crawler-queue_list_" + showNum + "_dark", dark ? "1" : "0");
|
||||
prop.putHTML("crawler-queue_list_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
|
||||
prop.put("crawler-queue_list_" + showNum + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
|
||||
prop.put("crawler-queue_list_" + showNum + "_depth", urle.depth());
|
||||
prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.appdate()) );
|
||||
prop.putHTML("crawler-queue_list_" + showNum + "_anchor", urle.name());
|
||||
prop.putHTML("crawler-queue_list_" + showNum + "_url", urle.url().toString());
|
||||
prop.put("crawler-queue_list_" + showNum + "_hash", urle.url().hash());
|
||||
dark = !dark;
|
||||
showNum++;
|
||||
} else {
|
||||
stackSize--;
|
||||
}
|
||||
}
|
||||
prop.putNum("crawler-queue_show-num", showNum); //showin sjow-num most recent
|
||||
prop.putNum("crawler-queue_num", stackSize);//num Entries
|
||||
prop.putNum("crawler-queue_list", showNum);
|
||||
}
|
||||
return prop;
|
||||
}
|
||||
}
|
@ -1,124 +0,0 @@
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.index.Segments;
|
||||
import de.anomic.crawler.NoticedURL;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class queues_p {
|
||||
|
||||
public static final String STATE_RUNNING = "running";
|
||||
public static final String STATE_PAUSED = "paused";
|
||||
|
||||
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
|
||||
private static String daydate(final Date date) {
|
||||
if (date == null) return "";
|
||||
return dayFormatter.format(date);
|
||||
}
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
//wikiCode wikiTransformer = new wikiCode(switchboard);
|
||||
final serverObjects prop = new serverObjects();
|
||||
Segment segment = null;
|
||||
final boolean html = post != null && post.containsKey("html");
|
||||
prop.setLocalized(html);
|
||||
if (post != null && post.containsKey("segment") && sb.verifyAuthentication(header)) {
|
||||
segment = sb.indexSegments.segment(post.get("segment"));
|
||||
}
|
||||
if (segment == null) segment = sb.indexSegments.segment(Segments.Process.PUBLIC);
|
||||
prop.put("rejected", "0");
|
||||
//int showRejectedCount = 10;
|
||||
|
||||
Seed initiator;
|
||||
|
||||
// index size
|
||||
prop.putNum("urlpublictextSize", segment.urlMetadata().size());
|
||||
prop.putNum("rwipublictextSize", segment.termIndex().sizesMax());
|
||||
|
||||
// loader queue
|
||||
prop.putNum("loaderSize", sb.crawlQueues.workerSize());
|
||||
prop.putNum("loaderMax", sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10));
|
||||
if (sb.crawlQueues.workerSize() == 0) {
|
||||
prop.put("list-loader", "0");
|
||||
} else {
|
||||
final Request[] w = sb.crawlQueues.activeWorkerEntries();
|
||||
int count = 0;
|
||||
for (final Request r : w) {
|
||||
if (r == null) continue;
|
||||
prop.put("list-loader_"+count+"_profile", r.profileHandle());
|
||||
initiator = sb.peers.getConnected((r.initiator() == null) ? "" : ASCII.String(r.initiator()));
|
||||
prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
|
||||
prop.put("list-loader_"+count+"_depth", r.depth());
|
||||
prop.putXML("list-loader_"+count+"_url", r.url().toString());
|
||||
count++;
|
||||
}
|
||||
prop.put("list-loader", count);
|
||||
}
|
||||
|
||||
//local crawl queue
|
||||
prop.putNum("localCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount());
|
||||
prop.put("localCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? STATE_PAUSED : STATE_RUNNING);
|
||||
int stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.CORE);
|
||||
addNTable(sb, prop, "list-local", sb.crawlQueues.noticeURL.top(NoticedURL.StackType.CORE, Math.min(10, stackSize)));
|
||||
|
||||
//global crawl queue
|
||||
prop.putNum("limitCrawlSize", sb.crawlQueues.limitCrawlJobSize());
|
||||
prop.put("limitCrawlState", STATE_RUNNING);
|
||||
stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
|
||||
//remote crawl queue
|
||||
prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
|
||||
prop.put("remoteCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? STATE_PAUSED : STATE_RUNNING);
|
||||
stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.LIMIT);
|
||||
|
||||
if (stackSize == 0) {
|
||||
prop.put("list-remote", "0");
|
||||
} else {
|
||||
addNTable(sb, prop, "list-remote", sb.crawlQueues.noticeURL.top(NoticedURL.StackType.LIMIT, Math.min(10, stackSize)));
|
||||
}
|
||||
|
||||
//noload crawl queue
|
||||
prop.putNum("noloadCrawlSize", sb.crawlQueues.noloadCrawlJobSize());
|
||||
prop.put("noloadCrawlState", STATE_RUNNING);
|
||||
//stackSize = sb.crawlQueues.noticeURL.stackSize(NoticedURL.StackType.NOLOAD);
|
||||
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
|
||||
|
||||
public static final void addNTable(final Switchboard sb, final serverObjects prop, final String tableName, final List<Request> crawlerList) {
|
||||
|
||||
int showNum = 0;
|
||||
Seed initiator;
|
||||
for (final Request urle : crawlerList) {
|
||||
if ((urle != null) && (urle.url() != null)) {
|
||||
initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : UTF8.String(urle.initiator()));
|
||||
prop.put(tableName + "_" + showNum + "_profile", urle.profileHandle());
|
||||
prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
|
||||
prop.put(tableName + "_" + showNum + "_depth", urle.depth());
|
||||
prop.put(tableName + "_" + showNum + "_modified", daydate(urle.appdate()));
|
||||
prop.putXML(tableName + "_" + showNum + "_anchor", urle.name());
|
||||
prop.putXML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true));
|
||||
prop.put(tableName + "_" + showNum + "_hash", urle.url().hash());
|
||||
showNum++;
|
||||
}
|
||||
}
|
||||
prop.put(tableName, showNum);
|
||||
|
||||
}
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
<?xml version="1.0"?>
|
||||
<queues>
|
||||
<dbsize>
|
||||
<urlpublictext>#[urlpublictextSize]#</urlpublictext>
|
||||
<rwipublictext>#[rwipublictextSize]#</rwipublictext>
|
||||
</dbsize>
|
||||
<loaderqueue>
|
||||
<size>#[loaderSize]#</size>
|
||||
<max>#[loaderMax]#</max>
|
||||
#{list-loader}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<url>#[url]#</url>
|
||||
</entry>
|
||||
#{/list-loader}#
|
||||
</loaderqueue>
|
||||
<localcrawlerqueue>
|
||||
<size>#[localCrawlSize]#</size>
|
||||
<state>#[localCrawlState]#</state>
|
||||
#{list-local}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<modified>#[modified]#</modified>
|
||||
<anchor>#[anchor]#</anchor>
|
||||
<url>#[url]#</url>
|
||||
<hash>#[hash]#</hash>
|
||||
<inProcess>#(inProcess)#false::true#(/inProcess)#</inProcess>
|
||||
</entry>
|
||||
#{/list-local}#
|
||||
</localcrawlerqueue>
|
||||
<limitcrawlerqueue>
|
||||
<size>#[limitCrawlSize]#</size>
|
||||
<state>#[limitCrawlState]#</state>
|
||||
#{list-limit}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<modified>#[modified]#</modified>
|
||||
<anchor>#[anchor]#</anchor>
|
||||
<url>#[url]#</url>
|
||||
<hash>#[hash]#</hash>
|
||||
<inProcess>#(inProcess)#false::true#(/inProcess)#</inProcess>
|
||||
</entry>
|
||||
#{/list-limit}#
|
||||
</limitcrawlerqueue>
|
||||
<remotecrawlerqueue>
|
||||
<size>#[remoteCrawlSize]#</size>
|
||||
<state>#[remoteCrawlState]#</state>
|
||||
#{list-remote}#
|
||||
<entry>
|
||||
<profile>#[profile]#</profile>
|
||||
<initiator>#[initiator]#</initiator>
|
||||
<depth>#[depth]#</depth>
|
||||
<modified>#[modified]#</modified>
|
||||
<anchor>#[anchor]#</anchor>
|
||||
<url>#[url]#</url>
|
||||
<hash>#[hash]#</hash>
|
||||
<inProcess>#(inProcess)#false::true#(/inProcess)#</inProcess>
|
||||
</entry>
|
||||
#{/list-remote}#
|
||||
</remotecrawlerqueue>
|
||||
<noloadcrawlerqueue>
|
||||
<size>#[noloadCrawlSize]#</size>
|
||||
<state>#[noloadCrawlState]#</state>
|
||||
</noloadcrawlerqueue>
|
||||
</queues>
|
@ -1,35 +1,52 @@
|
||||
<?xml version="1.0"?>
|
||||
<status>
|
||||
<ppm>#[ppm]#</ppm>
|
||||
|
||||
<wordCacheSize>#[wordCacheSize]#</wordCacheSize>
|
||||
<wordCacheMaxSize>#[wordCacheMaxSize]#</wordCacheMaxSize>
|
||||
|
||||
<memory>
|
||||
<free>#[freeMemory]#</free>
|
||||
<total>#[totalMemory]#</total>
|
||||
<max>#[maxMemory]#</max>
|
||||
</memory>
|
||||
|
||||
<processors>#[processors]#</processors>
|
||||
|
||||
<traffic>
|
||||
<in>#[trafficIn]#</in>
|
||||
<proxy>#[trafficProxy]#</proxy>
|
||||
<crawler>#[trafficCrawler]#</crawler>
|
||||
</traffic>
|
||||
|
||||
<dbsize>
|
||||
<urlpublictext>#[urlpublictextSize]#</urlpublictext>
|
||||
<rwipublictext>#[rwipublictextSize]#</rwipublictext>
|
||||
</dbsize>
|
||||
|
||||
<loaderqueue>
|
||||
<size>#[loaderSize]#</size>
|
||||
<max>#[loaderMax]#</max>
|
||||
</loaderqueue>
|
||||
|
||||
<localcrawlerqueue>
|
||||
<size>#[localCrawlSize]#</size>
|
||||
<state>#[localCrawlState]#</state>
|
||||
</localcrawlerqueue>
|
||||
|
||||
<limitcrawlerqueue>
|
||||
<size>#[limitCrawlSize]#</size>
|
||||
<state>#[limitCrawlState]#</state>
|
||||
</limitcrawlerqueue>
|
||||
|
||||
<remotecrawlerqueue>
|
||||
<size>#[remoteCrawlSize]#</size>
|
||||
<state>#[remoteCrawlState]#</state>
|
||||
</remotecrawlerqueue>
|
||||
|
||||
<noloadcrawlerqueue>
|
||||
<size>#[noloadCrawlSize]#</size>
|
||||
<state>#[noloadCrawlState]#</state>
|
||||
</noloadcrawlerqueue>
|
||||
|
||||
<memory>
|
||||
<free>#[freeMemory]#</free>
|
||||
<total>#[totalMemory]#</total>
|
||||
<max>#[maxMemory]#</max>
|
||||
</memory>
|
||||
<processors>#[processors]#</processors>
|
||||
<traffic>
|
||||
<in>#[trafficIn]#</in>
|
||||
<proxy>#[trafficProxy]#</proxy>
|
||||
<crawler>#[trafficCrawler]#</crawler>
|
||||
</traffic>
|
||||
|
||||
</status>
|
||||
|
After Width: | Height: | Size: 932 B |
@ -0,0 +1,174 @@
|
||||
/**
|
||||
* MapColumnIndex
|
||||
* Copyright 2012 by Michael Christen
|
||||
* First released 01.02.2012 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate$
|
||||
* $LastChangedRevision$
|
||||
* $LastChangedBy$
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.kelondro.blob;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.kelondro.order.NaturalOrder;
|
||||
|
||||
/**
|
||||
* a mapping from a column name to maps with the value of the columns to the primary keys where the entry exist in the table
|
||||
*/
|
||||
public class MapColumnIndex {
|
||||
|
||||
private static final long serialVersionUID=-424741536889467566L;
|
||||
|
||||
private final Map<String, Map<String, Collection<byte[]>>> index;
|
||||
|
||||
public MapColumnIndex() {
|
||||
this.index = new HashMap<String, Map<String, Collection<byte[]>>>();
|
||||
}
|
||||
|
||||
public synchronized Collection<byte[]> getIndex(final String whereKey, final String isValue) throws UnsupportedOperationException {
|
||||
Map<String, Collection<byte[]>> references = this.index.get(whereKey);
|
||||
if (references == null) throw new UnsupportedOperationException();
|
||||
Collection<byte[]> indexes = references.get(isValue);
|
||||
if (indexes == null) return new ArrayList<byte[]>(0); // empty collection
|
||||
return indexes;
|
||||
}
|
||||
|
||||
public synchronized void clear() {
|
||||
this.index.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a full index for the whereKey
|
||||
* @param whereKey
|
||||
* @param isValue
|
||||
* @param table
|
||||
*/
|
||||
public synchronized void init(final String whereKey, final String isValue, final Iterator<Map.Entry<byte[], Map<String, String>>> table) {
|
||||
Map<String, Collection<byte[]>> valueIdxMap = new HashMap<String, Collection<byte[]>>();
|
||||
this.index.put(whereKey, valueIdxMap);
|
||||
Map.Entry<byte[], Map<String, String>> line;
|
||||
while (table.hasNext()) {
|
||||
line = table.next();
|
||||
String value = line.getValue().get(whereKey);
|
||||
if (value == null) continue; // we don't need to remember that
|
||||
indexupdate(line.getKey(), valueIdxMap, value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* update an index entry
|
||||
* @param primarykey the primary key for the row that is updated
|
||||
* @param row the row that was updated (a mapping from column names to values)
|
||||
*/
|
||||
public synchronized void update(final byte[] primarykey, final Map<String, String> row) {
|
||||
for (Map.Entry<String, Map<String, Collection<byte[]>>> entry: this.index.entrySet()) {
|
||||
// create an index for all columns that we track
|
||||
String value = row.get(entry.getKey());
|
||||
if (value == null) continue; // we don't need to remember that
|
||||
indexupdate(primarykey, entry.getValue(), value);
|
||||
}
|
||||
}
|
||||
|
||||
private void indexupdate(final byte[] primarykey, final Map<String, Collection<byte[]>> valueIdxMap, final String value) {
|
||||
Collection<byte[]> indexes = valueIdxMap.get(value);
|
||||
if (indexes == null) {
|
||||
// create a new index entry
|
||||
indexes = new ArrayList<byte[]>(1);
|
||||
indexes.add(primarykey);
|
||||
valueIdxMap.put(value, indexes);
|
||||
} else {
|
||||
// update the existing index entry
|
||||
// check if value already exist
|
||||
if (!net.yacy.kelondro.util.ByteBuffer.contains(indexes, primarykey)) {
|
||||
indexes.add(primarykey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* delete all references to the primary key
|
||||
* @param primarykey
|
||||
*/
|
||||
public synchronized void delete(final byte[] primarykey) {
|
||||
for (Map.Entry<String, Map<String, Collection<byte[]>>> entry: this.index.entrySet()) {
|
||||
// we must check all index reference maps: iterate over entries
|
||||
indexdelete(primarykey, entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
private void indexdelete(final byte[] index, final Map<String, Collection<byte[]>> valueIdxMap) {
|
||||
Iterator<Map.Entry<String, Collection<byte[]>>> i = valueIdxMap.entrySet().iterator();
|
||||
Map.Entry<String, Collection<byte[]>> ref;
|
||||
while (i.hasNext()) {
|
||||
ref = i.next();
|
||||
net.yacy.kelondro.util.ByteBuffer.remove(ref.getValue(), index);
|
||||
if (ref.getValue().isEmpty()) {
|
||||
i.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Collection<byte[]> getIndexWithExceptionHandler(final MapColumnIndex idx, final String whereKey, final String isValue, Map<byte[], Map<String, String>> table) {
|
||||
try {
|
||||
return idx.getIndex(whereKey, isValue);
|
||||
} catch (UnsupportedOperationException e) {
|
||||
idx.init(whereKey, isValue, table.entrySet().iterator());
|
||||
try {
|
||||
return idx.getIndex(whereKey, isValue);
|
||||
} catch (UnsupportedOperationException ee) {
|
||||
throw ee;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void printIndex(Collection<byte[]> index) {
|
||||
System.out.print("idx{");
|
||||
int c = 0;
|
||||
for (byte[] a: index) {
|
||||
if (c++ != 0) System.out.print(", ");
|
||||
System.out.print(ASCII.String(a));
|
||||
}
|
||||
System.out.print("}");
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
Map<byte[], Map<String, String>> table = new TreeMap<byte[], Map<String, String>>(NaturalOrder.naturalOrder);
|
||||
Map<String, String> row;
|
||||
row = new HashMap<String, String>(); row.put("a", "1"); row.put("b", "2"); row.put("c", "2"); table.put("line1".getBytes(), row);
|
||||
row = new HashMap<String, String>(); row.put("a", "3"); row.put("b", "2"); row.put("c", "4"); table.put("line2".getBytes(), row);
|
||||
row = new HashMap<String, String>(); row.put("a", "5"); row.put("b", "2"); row.put("c", "4"); table.put("line3".getBytes(), row);
|
||||
row = new HashMap<String, String>(); row.put("a", "6"); row.put("b", "7"); row.put("c", "8"); table.put("line4".getBytes(), row);
|
||||
MapColumnIndex idx = new MapColumnIndex();
|
||||
System.out.print("colum b, value 2: "); printIndex(getIndexWithExceptionHandler(idx, "b", "2", table)); System.out.println();
|
||||
System.out.print("colum c, value 4: "); printIndex(getIndexWithExceptionHandler(idx, "c", "4", table)); System.out.println();
|
||||
System.out.print("colum b, value 2: "); printIndex(getIndexWithExceptionHandler(idx, "b", "7", table)); System.out.println();
|
||||
System.out.print("colum d, value 0: "); printIndex(getIndexWithExceptionHandler(idx, "d", "0", table)); System.out.println();
|
||||
row = new HashMap<String, String>(); row.put("a", "9"); row.put("b", "9"); row.put("c", "4"); table.put("line5".getBytes(), row);
|
||||
idx.update("line5".getBytes(), row);
|
||||
System.out.print("colum c, value 4: "); printIndex(getIndexWithExceptionHandler(idx, "c", "4", table)); System.out.println();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue