parent
a98c395023
commit
7a7a1277bd
@ -0,0 +1,45 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >
|
||||||
|
<head>
|
||||||
|
<title>YaCy '#[clientname]#': Crawl Start</title>
|
||||||
|
#%env/templates/metas.template%#
|
||||||
|
</head>
|
||||||
|
</html>
|
||||||
|
<body id="Autocrawl">
|
||||||
|
#%env/templates/header.template%#
|
||||||
|
#%env/templates/submenuIndexCreate.template%#
|
||||||
|
|
||||||
|
<h2>Autocrawler</h2>
|
||||||
|
Autocrawler automatically selects and adds tasks to the local crawl queue. This will work best when there are already quite a few domains in the index.
|
||||||
|
|
||||||
|
<fieldset>
|
||||||
|
<legend>Autocralwer Configuration</legend>
|
||||||
|
|
||||||
|
<form id="ConfigForm" method="post" action="Autocrawl_p.html" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||||
|
<dl>
|
||||||
|
#(changed)#::<dt></dt><dd><span class="error">You need to restart for some settings to be applied</span></dd>#(/changed)#
|
||||||
|
<dt>Enable Autocrawler:</dt>
|
||||||
|
<dd><input id="autocrawlEnable" name="autocrawlEnable" type="checkbox" #(autocrawlEnable)#::checked="checked"#(/autocrawlEnable)# /></dd>
|
||||||
|
<dt>Deep crawl every:</dt>
|
||||||
|
<dd>
|
||||||
|
<input id="autocrawlRatio" name="autocrawlRatio" type="number" min="1" max="500" step="1" size="2" maxlength="2" value="#[autocrawlRatio]#" />
|
||||||
|
Warning: if this is bigger than "Rows to fetch" only shallow crawls will run.
|
||||||
|
</dd>
|
||||||
|
<dt>Rows to fetch at once:</dt>
|
||||||
|
<dd><input id="autocrawlRows" name="autocrawlRows" type="number" min="1" max="500" step="1" size="3" maxlength="3" value="#[autocrawlRows]#" /></dd>
|
||||||
|
<dt>Recrawl only older than # days:</dt>
|
||||||
|
<dd><input id="autocrawlDays" name="autocrawlDays" type="number" min="1" max="360" step="1" size="3" maxlength="3" value="#[autocrawlDays]#" /></dd>
|
||||||
|
<dt>Get hosts by query:</dt>
|
||||||
|
<dd>
|
||||||
|
<input id="autocrawlQuery" name="autocrawlQuery" type="text" size="50" value="#[autocrawlQuery]#" />
|
||||||
|
Can be any valid Solr query.
|
||||||
|
</dd>
|
||||||
|
<dt>Shallow crawl depth (0 to 2):</dt>
|
||||||
|
<dd><input id="autocrawlShallow" name="autocrawlShallow" type="number" min="0" max="2" step="1" size="1" maxlength="1" value="#[autocrawlShallow]#" /></dd>
|
||||||
|
<dt>Deep crawl depth (1 to 5):</dt>
|
||||||
|
<dd><input id="autocrawlDeep" name="autocrawlDeep" type="number" min="1" max="5" step="1" size="1" maxlength="1" value="#[autocrawlDeep]#" /></dd>
|
||||||
|
<dt><input type="submit" name="save" class="btn btn-primary" value="Save" /></dt>
|
||||||
|
</dl>
|
||||||
|
</form>
|
||||||
|
</fieldset>
|
||||||
|
</body>
|
@ -0,0 +1,94 @@
|
|||||||
|
import net.yacy.cora.protocol.RequestHeader;
|
||||||
|
import net.yacy.crawler.data.CrawlProfile.CrawlAttribute;
|
||||||
|
import net.yacy.search.Switchboard;
|
||||||
|
import net.yacy.search.SwitchboardConstants;
|
||||||
|
import net.yacy.server.serverObjects;
|
||||||
|
import net.yacy.server.serverSwitch;
|
||||||
|
|
||||||
|
public class Autocrawl_p {
|
||||||
|
|
||||||
|
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||||
|
final serverObjects prop = new serverObjects();
|
||||||
|
final Switchboard sb = (Switchboard) env;
|
||||||
|
|
||||||
|
boolean autocrawlEnable = sb.getConfigBool(SwitchboardConstants.AUTOCRAWL, false);
|
||||||
|
int autocrawlRatio = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_RATIO, "50"));
|
||||||
|
int autocrawlRows = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_ROWS, "100"));
|
||||||
|
int autocrawlDays = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_DAYS, "1"));
|
||||||
|
String autocrawlQuery = sb.getConfig(SwitchboardConstants.AUTOCRAWL_QUERY, "*:*");
|
||||||
|
int autocrawlShallow = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_SHALLOW_DEPTH, "1"));
|
||||||
|
int autocrawlDeep = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_DEEP_DEPTH, "3"));
|
||||||
|
|
||||||
|
if (post != null) {
|
||||||
|
autocrawlEnable = post.getBoolean("autocrawlEnable");
|
||||||
|
if (post.containsKey("autocrawlRatio")) {
|
||||||
|
autocrawlRatio = post.getInt("autocrawlRatio", 50);
|
||||||
|
}
|
||||||
|
if (post.containsKey("autocrawlRows")) {
|
||||||
|
autocrawlRows = post.getInt("autocralwRows", 100);
|
||||||
|
}
|
||||||
|
if (post.containsKey("autocrawlDays")) {
|
||||||
|
autocrawlDays = post.getInt("autocrawlDays", 1);
|
||||||
|
}
|
||||||
|
if (post.containsKey("autocrawlQuery")) {
|
||||||
|
autocrawlQuery = post.get("autocrawlQuery", "*:*");
|
||||||
|
}
|
||||||
|
if (post.containsKey("autocrawlShallow")){
|
||||||
|
autocrawlShallow = post.getInt("autocrawlShallow", 1);
|
||||||
|
}
|
||||||
|
if (post.containsKey("autocrawlDeep")) {
|
||||||
|
autocrawlDeep = post.getInt("autocrawlDeep", 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (autocrawlRatio > 500) {
|
||||||
|
autocrawlRatio = 500;
|
||||||
|
} else if (autocrawlRatio < 1) {
|
||||||
|
autocrawlRatio = 1;
|
||||||
|
}
|
||||||
|
if (autocrawlRows > 500) {
|
||||||
|
autocrawlRows = 500;
|
||||||
|
} else if (autocrawlRows < 1) {
|
||||||
|
autocrawlRows = 1;
|
||||||
|
}
|
||||||
|
if (autocrawlDays > 60) {
|
||||||
|
autocrawlDays = 60;
|
||||||
|
} else if (autocrawlDays < 1) {
|
||||||
|
autocrawlDays = 1;
|
||||||
|
}
|
||||||
|
if (autocrawlShallow > 1) {
|
||||||
|
autocrawlShallow = 2;
|
||||||
|
} else if (autocrawlShallow < 0) {
|
||||||
|
autocrawlShallow = 0;
|
||||||
|
}
|
||||||
|
if (autocrawlDeep > 5) {
|
||||||
|
autocrawlDeep = 5;
|
||||||
|
} else if (autocrawlDeep < 1) {
|
||||||
|
autocrawlDeep = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (post != null) {
|
||||||
|
sb.setConfig(SwitchboardConstants.AUTOCRAWL, autocrawlEnable);
|
||||||
|
sb.setConfig(SwitchboardConstants.AUTOCRAWL_RATIO, autocrawlRatio);
|
||||||
|
sb.setConfig(SwitchboardConstants.AUTOCRAWL_ROWS, autocrawlRows);
|
||||||
|
sb.setConfig(SwitchboardConstants.AUTOCRAWL_DAYS, autocrawlDays);
|
||||||
|
sb.setConfig(SwitchboardConstants.AUTOCRAWL_QUERY, autocrawlQuery);
|
||||||
|
sb.setConfig(SwitchboardConstants.AUTOCRAWL_SHALLOW_DEPTH, autocrawlShallow);
|
||||||
|
sb.setConfig(SwitchboardConstants.AUTOCRAWL_DEEP_DEPTH, autocrawlDeep);
|
||||||
|
|
||||||
|
sb.initAutocrawl(autocrawlEnable);
|
||||||
|
|
||||||
|
prop.put("changed", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
prop.put("autocrawlEnable", autocrawlEnable);
|
||||||
|
prop.put("autocrawlRatio", autocrawlRatio);
|
||||||
|
prop.put("autocrawlRows", autocrawlRows);
|
||||||
|
prop.put("autocrawlDays", autocrawlDays);
|
||||||
|
prop.put("autocrawlQuery", autocrawlQuery);
|
||||||
|
prop.put("autocrawlShallow", autocrawlShallow);
|
||||||
|
prop.put("autocrawlDeep", autocrawlDeep);
|
||||||
|
|
||||||
|
return prop;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue