redesigned CrawlStartScanner user interface and added more features:

- multiple hosts for environment scans can be given (comma-separated)
- each service (ftp, smb, http, https) for the scan can be selected
- the scan result can be accumulated or refreshed each time a network scan is made
- a scheduler was added to repeat a scan and add all found urls to the indexer automatically

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7378 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 6f4f957e50
commit c288fcf634

@ -13,11 +13,6 @@
selectForm.elements["item_" + i].checked = !selectForm.elements["item_" + i].checked;
}
}
function disableSubmit() {
document.getElementById('scanipSubmit').disabled=true;
document.getElementById('scanhostSubmit').disabled=true;
document.getElementById('scanintranetSubmit').disabled=true;
}
-->
</script>
<script type="text/javascript" src="/js/sorttable.js"></script>
@ -27,13 +22,11 @@
#%env/templates/submenuIndexCreate.template%#
<h2>Network Scanner</h2>
#(selectiprange)#::
<p>
YaCy can scan a network segment for available http, ftp and smb server.
You must first select a IP range and then, after this range is scanned,
it is possible to select servers that had been found for a full-site crawl.
</p>
#(/selectiprange)#
#(noserverdetected)#::
<p>
@ -41,32 +34,59 @@
</p>
#(/noserverdetected)#
#(enterrange)#::
<div><table border="0"><tr valign="top">
<td width="270"><form onSubmit="document.getElementById('scanipinfo').innerHTML='Please wait...'; disableSubmit();" action="CrawlStartScanner_p.html" method="get">
<fieldset height="30">
<legend><label for="servertable">Scan with given IP range</label></legend><br>
<input type="text" name="ip4-0" value="#[ip4-0]#" size="3" maxlength="3" />.
<input type="text" name="ip4-1" value="#[ip4-1]#" size="3" maxlength="3" />.
<input type="text" name="ip4-2" value="#[ip4-2]#" size="3" maxlength="3" />.[1-254]
<input type="hidden" name="scanip" value=""/>
<input type="submit" id="scanipSubmit" name="scanipSubmit" value="Scan"/><br><br><div class="info" id="scanipinfo" style="text-decoration: blink;"></div>
</fieldset></form></td>
<td width="270"><form onSubmit="document.getElementById('scanhostinfo').innerHTML='Please wait...'; disableSubmit();" action="CrawlStartScanner_p.html" method="get">
<fieldset>
<legend><label for="servertable">Scan range with given host name</label></legend><br>
<input type="text" name="scanhost" value="#[host]#" size="28" maxlength="60" />
<input type="submit" id="scanhostSubmit" name="scanhostSubmit" value="Scan" /><br><br><div class="info" id="scanhostinfo" style="text-decoration: blink;"></div>
</fieldset></form></td>
<td><form onSubmit="document.getElementById('scanintranetinfo').innerHTML='Please wait...'; disableSubmit();" action="CrawlStartScanner_p.html" method="get">
<form onSubmit="document.getElementById('scanhostinfo').innerHTML='Please wait...'; disableSubmit();" action="CrawlStartScanner_p.html" method="get">
<fieldset>
<legend><label for="servertable">Full Intranet Scan</label></legend>
#(intranetHint)#::<div class="info">Do not use intranet scan results, you are not in an intranet environment!</div>#(/intranetHint)#
<input type="hidden" name="scanintranet" value=""/>
#[intranethosts]#&nbsp;<input type="submit" id="scanintranetSubmit" name="scanintranetSubmit" value="Scan" /><div class="info" id="scanintranetinfo" style="text-decoration: blink;"></div>
</fieldset></form></td>
</tr></table></div>
#(/enterrange)#
<legend>
<label>Scan the network</label>
</legend>
<dl>
<dt>Scan Range</dt>
<dd>
<input type="radio" name="source" id="source" value="hosts"#(intranet.checked)# checked="checked"::#(/intranet.checked)# />Scan sub-range with given host
<input type="text" name="scanhosts" value="#[scanhosts]#" size="60" maxlength="400" /><br/><br/>
<input type="radio" name="source" id="source" value="intranet"#(intranet.checked)#:: checked="checked"#(/intranet.checked)# />Full Intranet Scan: #[intranethosts]#<br/>
#(intranetHint)#::<div class="info">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Do not use intranet scan results, you are not in an intranet environment!</div>#(/intranetHint)#
</dd>
<dt>Scan Cache</dt>
<dd>
<input type="checkbox" name="accumulatescancache" id="accumulatescancache" checked="checked">accumulate scan results with access type "granted" into scan cache (do not delete old scan result)
</dd>
<dt>Service Type</dt>
<dd>
<input type="checkbox" name="scanftp" checked="checked">ftp
<input type="checkbox" name="scansmb" checked="checked">smb
<input type="checkbox" name="scanhttp">http
<input type="checkbox" name="scanhttps">https
</dd>
<dt>Scheduler</dt>
<dd>
<input type="radio" name="rescan" value="off" #(rescanCheck)#checked="checked"::#(/rescanCheck)#/>run only a scan<br/>
<input type="radio" name="rescan" value="scheduler" onclick="document.getElementById('accumulatescancache').checked = false" #(rescanCheck)#::checked="checked"#(/rescanCheck)#/>
scan and add all sites with granted access automatically. This disables the scan cache accumulation.
<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Look every
<select name="repeat_time">
<option value="1">1</option><option value="2">2</option><option value="3">3</option>
<option value="4">4</option><option value="5">5</option><option value="6">6</option>
<option value="7">7</option><option value="8">8</option>
<option value="9">9</option><option value="10" selected="selected">10</option>
<option value="12">12</option><option value="14">14</option><option value="15">15</option>
<option value="21">21</option><option value="28">28</option><option value="30">30</option>
</select>
<select name="repeat_unit">
<option value="selminutes" selected="selected">minutes</option>
<option value="selhours">hours</option>
<option value="seldays">days</option>
</select> again and add new sites automatically to indexer.<br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Sites that do not appear during a scheduled scan period will be excluded from search results.
</dd>
<dt>&nbsp;</dt>
<dd><input type="submit" id="scan" name="scan" value="Scan" /><div class="info" id="scanhostinfo" style="text-decoration: blink;">&nbsp;</div></dd>
</dl>
</fieldset>
</form>
#(servertable)#::
<p>

@ -22,6 +22,7 @@
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.List;
@ -50,120 +51,132 @@ public class CrawlStartScanner_p {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard)env;
prop.put("selectiprange", 0);
prop.put("noserverdetected", 0);
prop.put("enterrange", 0);
prop.put("servertable", 0);
prop.put("enterrange_host", "");
// make a comment cache
Map<byte[], String> apiCommentCache = commentCache(sb);
addSelectIPRange(sb, prop);
addScantable(apiCommentCache, prop);
// case: no query part of the request; ask for input
if (post == null) {
prop.put("selectiprange", 1);
return prop;
prop.put("hosts", "");
prop.put("intranet.checked", sb.isIntranetMode() ? 1 : 0);
// make a scanhosts entry
String hosts = post == null ? "" : post.get("scanhosts", "");
List<InetAddress> ips = Domains.myIntranetIPs();
prop.put("intranethosts", ips.toString());
prop.put("intranetHint", sb.isIntranetMode() ? 0 : 1);
if (hosts.length() == 0) {
InetAddress ip;
if (sb.isIntranetMode()) {
if (ips.size() > 0) ip = ips.get(0); else try {
ip = InetAddress.getByName("192.168.0.1");
} catch (UnknownHostException e) {
ip = null;
e.printStackTrace();
}
} else {
ip = Domains.myPublicLocalIP();
if (Domains.isThisHostIP(ip)) ip = sb.peers.mySeed().getInetAddress();
}
if (ip != null) hosts = ip.getHostAddress();
}
prop.put("scanhosts", hosts);
// case: an IP range was given; scan the range for services and display result
if (post.containsKey("scanip") || post.containsKey("scanhost")) {
InetAddress ia;
try {
if (post.containsKey("scanip")) {
ia = InetAddress.getByAddress(new byte[]{(byte) post.getInt("ip4-0", 0), (byte) post.getInt("ip4-1", 0), (byte) post.getInt("ip4-2", 0), (byte) post.getInt("ip4-3", 0)});
} else {
String host = post.get("scanhost", "");
// parse post requests
if (post != null) {
// case: an IP range was given; scan the range for services and display result
if (post.containsKey("scan") && post.get("source", "").equals("hosts")) {
List<InetAddress> ia = new ArrayList<InetAddress>();
for (String host: hosts.split(",")) try {
if (host.startsWith("http://")) host = host.substring(7);
if (host.startsWith("https://")) host = host.substring(8);
if (host.startsWith("ftp://")) host = host.substring(6);
if (host.startsWith("smb://")) host = host.substring(6);
int p = host.indexOf('/');
if (p >= 0) host = host.substring(0, p);
ia = InetAddress.getByName(host);
prop.put("enterrange_host", host);
}
addSelectIPRange(ia, prop);
Scanner scanner = new Scanner(ia, 100, sb.isIntranetMode() ? 100 : 3000);
scanner.addFTP(false);
scanner.addHTTP(false);
scanner.addHTTPS(false);
scanner.addSMB(false);
ia.add(InetAddress.getByName(host));
} catch (UnknownHostException e) {}
Scanner scanner = new Scanner(ia, 100, sb.isIntranetMode() ? 100 : 1000);
if (post.get("scanftp", "").equals("on")) scanner.addFTP(false);
if (post.get("scanhttp", "").equals("on")) scanner.addHTTP(false);
if (post.get("scanhttps", "").equals("on")) scanner.addHTTPS(false);
if (post.get("scansmb", "").equals("on")) scanner.addSMB(false);
scanner.start();
scanner.terminate();
enlargeScancache(apiCommentCache, scanner);
addScantable(apiCommentCache, prop);
} catch (UnknownHostException e) {}
}
if (post.containsKey("scanintranet")) {
Scanner scanner = new Scanner(Domains.myIntranetIPs(), 100, sb.isIntranetMode() ? 100 : 3000);
scanner.addFTP(false);
scanner.addHTTP(false);
scanner.addHTTPS(false);
scanner.addSMB(false);
scanner.start();
scanner.terminate();
enlargeScancache(apiCommentCache, scanner);
addScantable(apiCommentCache, prop);
}
// check crawl request
if (post != null && post.containsKey("crawl")) {
// make a pk/url mapping
Map<byte[], DigestURI> pkmap = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
for (MultiProtocolURI u: Scanner.scancache.keySet()) {
DigestURI uu = new DigestURI(u);
pkmap.put(uu.hash(), uu);
if (post.get("accumulatescancache", "").equals("on") && !post.get("rescan", "").equals("scheduler")) enlargeScancache(scanner.services()); else Scanner.scancache = scanner.services();
}
if (post.containsKey("scan") && post.get("source", "").equals("intranet")) {
Scanner scanner = new Scanner(Domains.myIntranetIPs(), 100, sb.isIntranetMode() ? 100 : 3000);
if (post.get("scanftp", "").equals("on")) scanner.addFTP(false);
if (post.get("scanhttp", "").equals("on")) scanner.addHTTP(false);
if (post.get("scanhttps", "").equals("on")) scanner.addHTTPS(false);
if (post.get("scansmb", "").equals("on")) scanner.addSMB(false);
scanner.start();
scanner.terminate();
if (post.get("accumulatescancache", "").equals("on") && !post.get("rescan", "").equals("scheduler")) enlargeScancache(scanner.services()); else Scanner.scancache = scanner.services();
}
// search for crawl start requests in this mapping
for (Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) {
byte [] pk = entry.getValue().substring(5).getBytes();
DigestURI url = pkmap.get(pk);
if (url != null) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
path += "&crawlingURL=" + url.toNormalform(true, false);
WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", ""), path, pk);
// check crawl request
if (post.containsKey("crawl")) {
// make a pk/url mapping
Map<byte[], DigestURI> pkmap = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
for (MultiProtocolURI u: Scanner.scancache.keySet()) {
DigestURI uu = new DigestURI(u);
pkmap.put(uu.hash(), uu);
}
// search for crawl start requests in this mapping
for (Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) {
byte [] pk = entry.getValue().substring(5).getBytes();
DigestURI url = pkmap.get(pk);
if (url != null) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
path += "&crawlingURL=" + url.toNormalform(true, false);
WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", ""), path, pk);
}
}
}
}
}
return prop;
}
private static void addSelectIPRange(Switchboard sb, serverObjects prop) {
InetAddress ip;
List<InetAddress> ips = Domains.myIntranetIPs();
prop.put("enterrange_intranethosts", ips.toString());
prop.put("enterrange_intranetHint", 0);
if (sb.isIntranetMode()) {
if (ips.size() > 0) ip = ips.get(0); else try {
ip = InetAddress.getByName("192.168.0.1");
} catch (UnknownHostException e) {
ip = null;
e.printStackTrace();
// check scheduler
if (post.get("rescan", "").equals("scheduler")) {
int repeat_time = Integer.parseInt(post.get("repeat_time", "-1"));
final String repeat_unit = post.get("repeat_unit", "selminutes"); // selminutes, selhours, seldays
// store this call as api call
if (repeat_time > 0) {
// store as scheduled api call
sb.tables.recordAPICall(post, "CrawlStartScanner_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "network scanner for hosts: " + hosts, repeat_time, repeat_unit.substring(3));
}
// execute the scan results
if (Scanner.scancache.size() > 0) {
// make a comment cache
Map<byte[], String> apiCommentCache = commentCache(sb);
String urlString;
DigestURI u;
try {
int i = 0;
for (final Map.Entry<MultiProtocolURI, Scanner.Access> host: Scanner.scancache.entrySet()) {
u = new DigestURI(host.getKey());
urlString = u.toNormalform(true, false);
if (host.getValue() == Access.granted && inIndex(apiCommentCache, urlString) == null) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
path += "&crawlingURL=" + urlString;
WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", ""), path, u.hash());
}
i++;
}
} catch (ConcurrentModificationException e) {}
}
}
} else {
prop.put("enterrange_intranetHint", 1);
ip = Domains.myPublicLocalIP();
}
addSelectIPRange(ip, prop);
}
private static void addSelectIPRange(InetAddress ip, serverObjects prop) {
prop.put("enterrange", 1);
byte[] address = ip.getAddress();
prop.put("enterrange_ip4-0", 0xff & address[0]);
prop.put("enterrange_ip4-1", 0xff & address[1]);
prop.put("enterrange_ip4-2", 0xff & address[2]);
}
private static void addScantable(Map<byte[], String> commentCache, serverObjects prop) {
// write scan table
if (Scanner.scancache.size() > 0) {
// make a comment cache
Map<byte[], String> apiCommentCache = commentCache(sb);
// show scancache table
prop.put("servertable", 1);
String urlString;
@ -183,8 +196,8 @@ public class CrawlStartScanner_p {
prop.put("servertable_list_" + i + "_accessEmpty", host.getValue() == Access.empty ? 1 : 0);
prop.put("servertable_list_" + i + "_accessGranted", host.getValue() == Access.granted ? 1 : 0);
prop.put("servertable_list_" + i + "_accessDenied", host.getValue() == Access.denied ? 1 : 0);
prop.put("servertable_list_" + i + "_process", inIndex(commentCache, urlString) == null ? 0 : 1);
prop.put("servertable_list_" + i + "_preselected", interesting(commentCache, u, host.getValue()) ? 1 : 0);
prop.put("servertable_list_" + i + "_process", inIndex(apiCommentCache, urlString) == null ? 0 : 1);
prop.put("servertable_list_" + i + "_preselected", interesting(apiCommentCache, u, host.getValue()) ? 1 : 0);
i++;
}
prop.put("servertable_list", i);
@ -195,20 +208,21 @@ public class CrawlStartScanner_p {
}
}
}
return prop;
}
private static void enlargeScancache(Map<byte[], String> commentCache, Scanner scanner) {
private static void enlargeScancache(Map<MultiProtocolURI, Access> newCache) {
if (Scanner.scancache == null) {
Scanner.scancache = scanner.services();
Scanner.scancache = newCache;
return;
}
Iterator<Map.Entry<MultiProtocolURI, Access>> i = Scanner.scancache.entrySet().iterator();
Map.Entry<MultiProtocolURI, Access> entry;
while (i.hasNext()) {
entry = i.next();
if (!interesting(commentCache, entry.getKey(), entry.getValue())) i.remove();
if (entry.getValue() != Access.granted) i.remove();
}
Scanner.scancache.putAll(scanner.services());
Scanner.scancache.putAll(newCache);
}
private static boolean interesting(Map<byte[], String> commentCache, MultiProtocolURI uri, Access access) {

@ -24,10 +24,12 @@ public class get_bookmarks {
private static int R = 1; // TODO: solve the recursion problem an remove global variable
/*
private final static int SORT_ALPHA = 1;
private final static int SORT_SIZE = 2;
private final static int SHOW_ALL = -1;
*/
private final static int MAXRESULTS = 10000;
// file types and display types

@ -51,7 +51,7 @@ public class genericParser extends AbstractParser implements Parser {
location.getHost(),
null,
null,
"",
location.toTokens(),
null,
null,
null,

Loading…
Cancel
Save