enhanced network scanner, is faster and more flexible now

- start more processes
- remove superfluous host name resolution
- better/more flexible subnet ip range calculation
- prefer ipv4 makes better usable ip pre-settings in servlet
- extended servlet by new subnet /20 - option
- redesign of scanner start process in servlet (generalization)
pull/1/head
Michael Peter Christen 12 years ago
parent 592adf7ccb
commit d1cb4cbc84

@ -54,7 +54,7 @@
</dd>
<dt>Subnet</dt>
<dd>
<input type="radio" name="subnet" value="24" checked="checked"/>/24 <input type="radio" name="subnet" value="16"/>/16
<input type="radio" name="subnet" value="24" checked="checked"/>/24 (254 addresses) <input type="radio" name="subnet" value="20"/>/20 (4064 addresses) <input type="radio" name="subnet" value="16"/>/16 (65024 adresses)
</dd>
<dt>Scan Cache</dt>
<dd>

@ -23,6 +23,7 @@ import java.net.MalformedURLException;
import java.util.ConcurrentModificationException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
@ -44,7 +45,7 @@ import net.yacy.server.serverSwitch;
public class CrawlStartScanner_p
{
private final static int CONCURRENT_RUNNER = 100;
private final static int CONCURRENT_RUNNER = 200;
public static serverObjects respond(
@SuppressWarnings("unused") final RequestHeader header,
@ -100,71 +101,38 @@ public class CrawlStartScanner_p
repeat_unit = post.get("repeat_unit", "selminutes"); // selminutes, selhours, seldays
}
final boolean bigrange = post.get("subnet", "24").equals("16");
// case: an IP range was given; scan the range for services and display result
if ( post.containsKey("scan") && "hosts".equals(post.get("source", "")) ) {
final Set<InetAddress> ia = new HashSet<InetAddress>();
for ( String host : hosts.split(",") ) {
if ( host.startsWith("http://") ) {
host = host.substring(7);
}
if ( host.startsWith("https://") ) {
host = host.substring(8);
}
if ( host.startsWith("ftp://") ) {
host = host.substring(6);
final int subnet = post.getInt("subnet", 24);
// scan a range of ips
if (post.containsKey("scan")) {
final Set<InetAddress> scanbase = new HashSet<InetAddress>();
// select host base to scan
if ("hosts".equals(post.get("source", ""))) {
for (String host: hosts.split(",")) {
if (host.startsWith("http://")) host = host.substring(7);
if (host.startsWith("https://")) host = host.substring(8);
if (host.startsWith("ftp://")) host = host.substring(6);
if (host.startsWith("smb://")) host = host.substring(6);
final int p = host.indexOf('/', 0);
if (p >= 0) host = host.substring(0, p);
if (host.length() > 0) scanbase.add(Domains.dnsResolve(host));
}
if ( host.startsWith("smb://") ) {
host = host.substring(6);
}
final int p = host.indexOf('/', 0);
if ( p >= 0 ) {
host = host.substring(0, p);
}
ia.add(Domains.dnsResolve(host));
}
final Scanner scanner = new Scanner(ia, CONCURRENT_RUNNER, timeout);
if ( post.get("scanftp", "").equals("on") ) {
scanner.addFTP(bigrange);
}
if ( post.get("scanhttp", "").equals("on") ) {
scanner.addHTTP(bigrange);
}
if ( post.get("scanhttps", "").equals("on") ) {
scanner.addHTTPS(bigrange);
}
if ( post.get("scansmb", "").equals("on") ) {
scanner.addSMB(bigrange);
}
scanner.start();
scanner.terminate();
if ( "on".equals(post.get("accumulatescancache", ""))
&& !"scheduler".equals(post.get("rescan", "")) ) {
Scanner.scancacheExtend(scanner);
} else {
Scanner.scancacheReplace(scanner);
}
}
if ( post.containsKey("scan") && "intranet".equals(post.get("source", "")) ) {
final Scanner scanner = new Scanner(Domains.myIntranetIPs(), CONCURRENT_RUNNER, timeout);
if ( "on".equals(post.get("scanftp", "")) ) {
scanner.addFTP(bigrange);
}
if ( "on".equals(post.get("scanhttp", "")) ) {
scanner.addHTTP(bigrange);
}
if ( "on".equals(post.get("scanhttps", "")) ) {
scanner.addHTTPS(bigrange);
}
if ( "on".equals(post.get("scansmb", "")) ) {
scanner.addSMB(bigrange);
}
if ("intranet".equals(post.get("source", ""))) {
scanbase.addAll(Domains.myIntranetIPs());
}
// start a scanner
final Scanner scanner = new Scanner(scanbase, CONCURRENT_RUNNER, timeout);
List<InetAddress> addresses = scanner.genlist(subnet);
if ("on".equals(post.get("scanftp", ""))) scanner.addFTP(addresses);
if ("on".equals(post.get("scanhttp", ""))) scanner.addHTTP(addresses);
if ("on".equals(post.get("scanhttps", ""))) scanner.addHTTPS(addresses);
if ("on".equals(post.get("scansmb", ""))) scanner.addSMB(addresses);
scanner.start();
scanner.terminate();
if ( "on".equals(post.get("accumulatescancache", ""))
&& !"scheduler".equals(post.get("rescan", "")) ) {
if ("on".equals(post.get("accumulatescancache", "")) && !"scheduler".equals(post.get("rescan", ""))) {
Scanner.scancacheExtend(scanner);
} else {
Scanner.scancacheReplace(scanner);
@ -177,7 +145,7 @@ public class CrawlStartScanner_p
final Iterator<Map.Entry<Scanner.Service, Scanner.Access>> se = Scanner.scancacheEntries();
final Map<byte[], DigestURI> pkmap =
new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
while ( se.hasNext() ) {
while (se.hasNext()) {
final Scanner.Service u = se.next().getKey();
DigestURI uu;
try {
@ -193,8 +161,7 @@ public class CrawlStartScanner_p
final byte[] pk = entry.getValue().substring(5).getBytes();
final DigestURI url = pkmap.get(pk);
if ( url != null ) {
String path =
"/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off";
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off";
path += "&crawlingURL=" + url.toNormalform(true);
WorkTables.execAPICall(
Domains.LOCALHOST,
@ -263,5 +230,5 @@ public class CrawlStartScanner_p
return prop;
}
}

@ -993,7 +993,7 @@ public class Domains {
final Set<InetAddress> list = new HashSet<InetAddress>();
if (localHostAddresses.isEmpty()) return list; // give up
for (final InetAddress a: localHostAddresses) {
if ((0Xff & a.getAddress()[0]) == 127 || LOCAL_PATTERNS.matcher(a.getHostAddress()).matches()) continue;
if ((0Xff & a.getAddress()[0]) == 127) continue;
list.add(a);
}
return list;

@ -212,7 +212,7 @@ public class Scanner extends Thread {
Service uri;
try {
while ((uri = this.scanqueue.take()) != POISONSERVICE) {
Thread.currentThread().setName("Scanner Start Loop; now: " + uri.getHostName()); // good for debugging
Thread.currentThread().setName("Scanner Start Loop; now: " + uri.getInetAddress()); // good for debugging
while (this.runner.size() >= this.runnerCount) {
/*for (Runner r: runner.keySet()) {
if (r.age() > 3000) synchronized(r) { r.interrupt(); }
@ -301,24 +301,24 @@ public class Scanner extends Thread {
}
}
public void addHTTP(final boolean bigrange) {
addProtocol(Protocol.http, bigrange);
public void addHTTP(final List<InetAddress> addresses) {
addProtocol(Protocol.http, addresses);
}
public void addHTTPS(final boolean bigrange) {
addProtocol(Protocol.https, bigrange);
public void addHTTPS(final List<InetAddress> addresses) {
addProtocol(Protocol.https, addresses);
}
public void addSMB(final boolean bigrange) {
addProtocol(Protocol.smb, bigrange);
public void addSMB(final List<InetAddress> addresses) {
addProtocol(Protocol.smb, addresses);
}
public void addFTP(final boolean bigrange) {
addProtocol(Protocol.ftp, bigrange);
public void addFTP(final List<InetAddress> addresses) {
addProtocol(Protocol.ftp, addresses);
}
private void addProtocol(final Protocol protocol, final boolean bigrange) {
for (final InetAddress i: genlist(bigrange)) {
private void addProtocol(final Protocol protocol, final List<InetAddress> addresses) {
for (final InetAddress i: addresses) {
try {
this.scanqueue.put(new Service(protocol, i));
} catch (final InterruptedException e) {
@ -326,10 +326,16 @@ public class Scanner extends Thread {
}
}
private final List<InetAddress> genlist(final boolean bigrange) {
/**
* generate a list of internetaddresses
* @param subnet the subnet: 24 will generate 254 addresses, 16 will generate 256 * 254; must be >= 16 and <= 24
* @return
*/
public final List<InetAddress> genlist(final int subnet) {
final ArrayList<InetAddress> c = new ArrayList<InetAddress>(10);
for (final InetAddress i: this.scanrange) {
for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) {
int ul = subnet >= 24 ? i.getAddress()[2] : (1 << (24 - subnet)) - 1;
for (int br = subnet >= 24 ? i.getAddress()[2] : 0; br <= ul; br++) {
for (int j = 1; j < 255; j++) {
final byte[] address = i.getAddress();
address[2] = (byte) br;
@ -358,10 +364,11 @@ public class Scanner extends Thread {
public static void main(final String[] args) {
//try {System.out.println("192.168.1.91: " + ping(new MultiProtocolURI("smb://192.168.1.91/"), 1000));} catch (MalformedURLException e) {}
final Scanner scanner = new Scanner(100, 10);
scanner.addFTP(false);
scanner.addHTTP(false);
scanner.addHTTPS(false);
scanner.addSMB(false);
List<InetAddress> addresses = scanner.genlist(20);
scanner.addFTP(addresses);
scanner.addHTTP(addresses);
scanner.addHTTPS(addresses);
scanner.addSMB(addresses);
scanner.start();
scanner.terminate();
for (final Service service: scanner.services().keySet()) {

@ -40,6 +40,8 @@ import java.nio.channels.FileLock;
import java.util.Properties;
import java.util.concurrent.Semaphore;
import sun.security.action.GetBooleanAction;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.protocol.ClientIdentification;
@ -597,6 +599,7 @@ public final class yacy {
if (OS.isWindows) headless = false;
if (args.length >= 1 && args[0].toLowerCase().equals("-gui")) headless = false;
System.setProperty("java.awt.headless", headless ? "true" : "false");
System.setProperty("java.net.preferIPv4Stack", "true");
String s = ""; for (final String a: args) s += a + " ";
yacyRelease.startParameter = s.trim();

Loading…
Cancel
Save