Add autocrawl settings page

pull/40/head
Ryszard Goń 9 years ago
parent a98c395023
commit 7a7a1277bd

@ -0,0 +1,45 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >
<head>
<title>YaCy '#[clientname]#': Crawl Start</title>
#%env/templates/metas.template%#
</head>
</html>
<body id="Autocrawl">
#%env/templates/header.template%#
#%env/templates/submenuIndexCreate.template%#
<h2>Autocrawler</h2>
Autocrawler automatically selects and adds tasks to the local crawl queue. This will work best when there are already quite a few domains in the index.
<fieldset>
<legend>Autocralwer Configuration</legend>
<form id="ConfigForm" method="post" action="Autocrawl_p.html" enctype="multipart/form-data" accept-charset="UTF-8">
<dl>
#(changed)#::<dt></dt><dd><span class="error">You need to restart for some settings to be applied</span></dd>#(/changed)#
<dt>Enable Autocrawler:</dt>
<dd><input id="autocrawlEnable" name="autocrawlEnable" type="checkbox" #(autocrawlEnable)#::checked="checked"#(/autocrawlEnable)# /></dd>
<dt>Deep crawl every:</dt>
<dd>
<input id="autocrawlRatio" name="autocrawlRatio" type="number" min="1" max="500" step="1" size="2" maxlength="2" value="#[autocrawlRatio]#" />
Warning: if this is bigger than "Rows to fetch" only shallow crawls will run.
</dd>
<dt>Rows to fetch at once:</dt>
<dd><input id="autocrawlRows" name="autocrawlRows" type="number" min="1" max="500" step="1" size="3" maxlength="3" value="#[autocrawlRows]#" /></dd>
<dt>Recrawl only older than # days:</dt>
<dd><input id="autocrawlDays" name="autocrawlDays" type="number" min="1" max="360" step="1" size="3" maxlength="3" value="#[autocrawlDays]#" /></dd>
<dt>Get hosts by query:</dt>
<dd>
<input id="autocrawlQuery" name="autocrawlQuery" type="text" size="50" value="#[autocrawlQuery]#" />
Can be any valid Solr query.
</dd>
<dt>Shallow crawl depth (0 to 2):</dt>
<dd><input id="autocrawlShallow" name="autocrawlShallow" type="number" min="0" max="2" step="1" size="1" maxlength="1" value="#[autocrawlShallow]#" /></dd>
<dt>Deep crawl depth (1 to 5):</dt>
<dd><input id="autocrawlDeep" name="autocrawlDeep" type="number" min="1" max="5" step="1" size="1" maxlength="1" value="#[autocrawlDeep]#" /></dd>
<dt><input type="submit" name="save" class="btn btn-primary" value="Save" /></dt>
</dl>
</form>
</fieldset>
</body>

@ -0,0 +1,94 @@
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.crawler.data.CrawlProfile.CrawlAttribute;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class Autocrawl_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
boolean autocrawlEnable = sb.getConfigBool(SwitchboardConstants.AUTOCRAWL, false);
int autocrawlRatio = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_RATIO, "50"));
int autocrawlRows = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_ROWS, "100"));
int autocrawlDays = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_DAYS, "1"));
String autocrawlQuery = sb.getConfig(SwitchboardConstants.AUTOCRAWL_QUERY, "*:*");
int autocrawlShallow = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_SHALLOW_DEPTH, "1"));
int autocrawlDeep = Integer.parseInt(sb.getConfig(SwitchboardConstants.AUTOCRAWL_DEEP_DEPTH, "3"));
if (post != null) {
autocrawlEnable = post.getBoolean("autocrawlEnable");
if (post.containsKey("autocrawlRatio")) {
autocrawlRatio = post.getInt("autocrawlRatio", 50);
}
if (post.containsKey("autocrawlRows")) {
autocrawlRows = post.getInt("autocralwRows", 100);
}
if (post.containsKey("autocrawlDays")) {
autocrawlDays = post.getInt("autocrawlDays", 1);
}
if (post.containsKey("autocrawlQuery")) {
autocrawlQuery = post.get("autocrawlQuery", "*:*");
}
if (post.containsKey("autocrawlShallow")){
autocrawlShallow = post.getInt("autocrawlShallow", 1);
}
if (post.containsKey("autocrawlDeep")) {
autocrawlDeep = post.getInt("autocrawlDeep", 3);
}
}
if (autocrawlRatio > 500) {
autocrawlRatio = 500;
} else if (autocrawlRatio < 1) {
autocrawlRatio = 1;
}
if (autocrawlRows > 500) {
autocrawlRows = 500;
} else if (autocrawlRows < 1) {
autocrawlRows = 1;
}
if (autocrawlDays > 60) {
autocrawlDays = 60;
} else if (autocrawlDays < 1) {
autocrawlDays = 1;
}
if (autocrawlShallow > 1) {
autocrawlShallow = 2;
} else if (autocrawlShallow < 0) {
autocrawlShallow = 0;
}
if (autocrawlDeep > 5) {
autocrawlDeep = 5;
} else if (autocrawlDeep < 1) {
autocrawlDeep = 1;
}
if (post != null) {
sb.setConfig(SwitchboardConstants.AUTOCRAWL, autocrawlEnable);
sb.setConfig(SwitchboardConstants.AUTOCRAWL_RATIO, autocrawlRatio);
sb.setConfig(SwitchboardConstants.AUTOCRAWL_ROWS, autocrawlRows);
sb.setConfig(SwitchboardConstants.AUTOCRAWL_DAYS, autocrawlDays);
sb.setConfig(SwitchboardConstants.AUTOCRAWL_QUERY, autocrawlQuery);
sb.setConfig(SwitchboardConstants.AUTOCRAWL_SHALLOW_DEPTH, autocrawlShallow);
sb.setConfig(SwitchboardConstants.AUTOCRAWL_DEEP_DEPTH, autocrawlDeep);
sb.initAutocrawl(autocrawlEnable);
prop.put("changed", true);
}
prop.put("autocrawlEnable", autocrawlEnable);
prop.put("autocrawlRatio", autocrawlRatio);
prop.put("autocrawlRows", autocrawlRows);
prop.put("autocrawlDays", autocrawlDays);
prop.put("autocrawlQuery", autocrawlQuery);
prop.put("autocrawlShallow", autocrawlShallow);
prop.put("autocrawlDeep", autocrawlDeep);
return prop;
}
}

@ -17,6 +17,7 @@
<li><a href="CrawlStartScanner_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Network Scanner</a></li>
<li><a href="RemoteCrawl_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Remote Crawling</a></li>
<li><a href="ProxyIndexingMonitor_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Scraping Proxy</a></li>
<li><a href="Autocrawl_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Autocrawl</a></li>
</ul>
</div>

Loading…
Cancel
Save