From d1cb4cbc8405cdde02553108ae55b8168e91fb33 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sat, 2 Feb 2013 09:51:43 +0100 Subject: [PATCH] enhanced network scanner, is faster and more flexible now - start more processes - remove superfluous host name resolution - better/more flexible subnet ip range calculation - prefer ipv4 makes better usable ip pre-settings in servlet - extended servlet by new subnet /20 - option - redesign of scanner start process in servlet (generalization) --- htroot/CrawlStartScanner_p.html | 2 +- htroot/CrawlStartScanner_p.java | 99 ++++++++-------------- source/net/yacy/cora/protocol/Domains.java | 2 +- source/net/yacy/cora/protocol/Scanner.java | 41 +++++---- source/net/yacy/yacy.java | 3 + 5 files changed, 62 insertions(+), 85 deletions(-) diff --git a/htroot/CrawlStartScanner_p.html b/htroot/CrawlStartScanner_p.html index d621c84b0..0bf0b21bd 100644 --- a/htroot/CrawlStartScanner_p.html +++ b/htroot/CrawlStartScanner_p.html @@ -54,7 +54,7 @@
Subnet
- /24 /16 + /24 (254 addresses) /20 (4064 addresses) /16 (65024 adresses)
Scan Cache
diff --git a/htroot/CrawlStartScanner_p.java b/htroot/CrawlStartScanner_p.java index 295ded2e4..1482991b0 100644 --- a/htroot/CrawlStartScanner_p.java +++ b/htroot/CrawlStartScanner_p.java @@ -23,6 +23,7 @@ import java.net.MalformedURLException; import java.util.ConcurrentModificationException; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; @@ -44,7 +45,7 @@ import net.yacy.server.serverSwitch; public class CrawlStartScanner_p { - private final static int CONCURRENT_RUNNER = 100; + private final static int CONCURRENT_RUNNER = 200; public static serverObjects respond( @SuppressWarnings("unused") final RequestHeader header, @@ -100,71 +101,38 @@ public class CrawlStartScanner_p repeat_unit = post.get("repeat_unit", "selminutes"); // selminutes, selhours, seldays } - final boolean bigrange = post.get("subnet", "24").equals("16"); - - // case: an IP range was given; scan the range for services and display result - if ( post.containsKey("scan") && "hosts".equals(post.get("source", "")) ) { - final Set ia = new HashSet(); - for ( String host : hosts.split(",") ) { - if ( host.startsWith("http://") ) { - host = host.substring(7); - } - if ( host.startsWith("https://") ) { - host = host.substring(8); - } - if ( host.startsWith("ftp://") ) { - host = host.substring(6); + final int subnet = post.getInt("subnet", 24); + + // scan a range of ips + if (post.containsKey("scan")) { + final Set scanbase = new HashSet(); + + // select host base to scan + if ("hosts".equals(post.get("source", ""))) { + for (String host: hosts.split(",")) { + if (host.startsWith("http://")) host = host.substring(7); + if (host.startsWith("https://")) host = host.substring(8); + if (host.startsWith("ftp://")) host = host.substring(6); + if (host.startsWith("smb://")) host = host.substring(6); + final int p = host.indexOf('/', 0); + if (p >= 0) host = host.substring(0, p); + if (host.length() > 0) scanbase.add(Domains.dnsResolve(host)); } - if ( host.startsWith("smb://") ) { - host = host.substring(6); - } - final int p = host.indexOf('/', 0); - if ( p >= 0 ) { - host = host.substring(0, p); - } - ia.add(Domains.dnsResolve(host)); - } - final Scanner scanner = new Scanner(ia, CONCURRENT_RUNNER, timeout); - if ( post.get("scanftp", "").equals("on") ) { - scanner.addFTP(bigrange); - } - if ( post.get("scanhttp", "").equals("on") ) { - scanner.addHTTP(bigrange); - } - if ( post.get("scanhttps", "").equals("on") ) { - scanner.addHTTPS(bigrange); - } - if ( post.get("scansmb", "").equals("on") ) { - scanner.addSMB(bigrange); - } - scanner.start(); - scanner.terminate(); - if ( "on".equals(post.get("accumulatescancache", "")) - && !"scheduler".equals(post.get("rescan", "")) ) { - Scanner.scancacheExtend(scanner); - } else { - Scanner.scancacheReplace(scanner); - } - } - - if ( post.containsKey("scan") && "intranet".equals(post.get("source", "")) ) { - final Scanner scanner = new Scanner(Domains.myIntranetIPs(), CONCURRENT_RUNNER, timeout); - if ( "on".equals(post.get("scanftp", "")) ) { - scanner.addFTP(bigrange); - } - if ( "on".equals(post.get("scanhttp", "")) ) { - scanner.addHTTP(bigrange); - } - if ( "on".equals(post.get("scanhttps", "")) ) { - scanner.addHTTPS(bigrange); - } - if ( "on".equals(post.get("scansmb", "")) ) { - scanner.addSMB(bigrange); } + if ("intranet".equals(post.get("source", ""))) { + scanbase.addAll(Domains.myIntranetIPs()); + } + + // start a scanner + final Scanner scanner = new Scanner(scanbase, CONCURRENT_RUNNER, timeout); + List addresses = scanner.genlist(subnet); + if ("on".equals(post.get("scanftp", ""))) scanner.addFTP(addresses); + if ("on".equals(post.get("scanhttp", ""))) scanner.addHTTP(addresses); + if ("on".equals(post.get("scanhttps", ""))) scanner.addHTTPS(addresses); + if ("on".equals(post.get("scansmb", ""))) scanner.addSMB(addresses); scanner.start(); scanner.terminate(); - if ( "on".equals(post.get("accumulatescancache", "")) - && !"scheduler".equals(post.get("rescan", "")) ) { + if ("on".equals(post.get("accumulatescancache", "")) && !"scheduler".equals(post.get("rescan", ""))) { Scanner.scancacheExtend(scanner); } else { Scanner.scancacheReplace(scanner); @@ -177,7 +145,7 @@ public class CrawlStartScanner_p final Iterator> se = Scanner.scancacheEntries(); final Map pkmap = new TreeMap(Base64Order.enhancedCoder); - while ( se.hasNext() ) { + while (se.hasNext()) { final Scanner.Service u = se.next().getKey(); DigestURI uu; try { @@ -193,8 +161,7 @@ public class CrawlStartScanner_p final byte[] pk = entry.getValue().substring(5).getBytes(); final DigestURI url = pkmap.get(pk); if ( url != null ) { - String path = - "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off"; + String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off"; path += "&crawlingURL=" + url.toNormalform(true); WorkTables.execAPICall( Domains.LOCALHOST, @@ -263,5 +230,5 @@ public class CrawlStartScanner_p return prop; } - + } diff --git a/source/net/yacy/cora/protocol/Domains.java b/source/net/yacy/cora/protocol/Domains.java index ca277c12a..ba098d95a 100644 --- a/source/net/yacy/cora/protocol/Domains.java +++ b/source/net/yacy/cora/protocol/Domains.java @@ -993,7 +993,7 @@ public class Domains { final Set list = new HashSet(); if (localHostAddresses.isEmpty()) return list; // give up for (final InetAddress a: localHostAddresses) { - if ((0Xff & a.getAddress()[0]) == 127 || LOCAL_PATTERNS.matcher(a.getHostAddress()).matches()) continue; + if ((0Xff & a.getAddress()[0]) == 127) continue; list.add(a); } return list; diff --git a/source/net/yacy/cora/protocol/Scanner.java b/source/net/yacy/cora/protocol/Scanner.java index 9af4cfb3b..876fac440 100644 --- a/source/net/yacy/cora/protocol/Scanner.java +++ b/source/net/yacy/cora/protocol/Scanner.java @@ -212,7 +212,7 @@ public class Scanner extends Thread { Service uri; try { while ((uri = this.scanqueue.take()) != POISONSERVICE) { - Thread.currentThread().setName("Scanner Start Loop; now: " + uri.getHostName()); // good for debugging + Thread.currentThread().setName("Scanner Start Loop; now: " + uri.getInetAddress()); // good for debugging while (this.runner.size() >= this.runnerCount) { /*for (Runner r: runner.keySet()) { if (r.age() > 3000) synchronized(r) { r.interrupt(); } @@ -301,24 +301,24 @@ public class Scanner extends Thread { } } - public void addHTTP(final boolean bigrange) { - addProtocol(Protocol.http, bigrange); + public void addHTTP(final List addresses) { + addProtocol(Protocol.http, addresses); } - public void addHTTPS(final boolean bigrange) { - addProtocol(Protocol.https, bigrange); + public void addHTTPS(final List addresses) { + addProtocol(Protocol.https, addresses); } - public void addSMB(final boolean bigrange) { - addProtocol(Protocol.smb, bigrange); + public void addSMB(final List addresses) { + addProtocol(Protocol.smb, addresses); } - public void addFTP(final boolean bigrange) { - addProtocol(Protocol.ftp, bigrange); + public void addFTP(final List addresses) { + addProtocol(Protocol.ftp, addresses); } - private void addProtocol(final Protocol protocol, final boolean bigrange) { - for (final InetAddress i: genlist(bigrange)) { + private void addProtocol(final Protocol protocol, final List addresses) { + for (final InetAddress i: addresses) { try { this.scanqueue.put(new Service(protocol, i)); } catch (final InterruptedException e) { @@ -326,10 +326,16 @@ public class Scanner extends Thread { } } - private final List genlist(final boolean bigrange) { + /** + * generate a list of internetaddresses + * @param subnet the subnet: 24 will generate 254 addresses, 16 will generate 256 * 254; must be >= 16 and <= 24 + * @return + */ + public final List genlist(final int subnet) { final ArrayList c = new ArrayList(10); for (final InetAddress i: this.scanrange) { - for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) { + int ul = subnet >= 24 ? i.getAddress()[2] : (1 << (24 - subnet)) - 1; + for (int br = subnet >= 24 ? i.getAddress()[2] : 0; br <= ul; br++) { for (int j = 1; j < 255; j++) { final byte[] address = i.getAddress(); address[2] = (byte) br; @@ -358,10 +364,11 @@ public class Scanner extends Thread { public static void main(final String[] args) { //try {System.out.println("192.168.1.91: " + ping(new MultiProtocolURI("smb://192.168.1.91/"), 1000));} catch (MalformedURLException e) {} final Scanner scanner = new Scanner(100, 10); - scanner.addFTP(false); - scanner.addHTTP(false); - scanner.addHTTPS(false); - scanner.addSMB(false); + List addresses = scanner.genlist(20); + scanner.addFTP(addresses); + scanner.addHTTP(addresses); + scanner.addHTTPS(addresses); + scanner.addSMB(addresses); scanner.start(); scanner.terminate(); for (final Service service: scanner.services().keySet()) { diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 24ad1c2dc..1cd4c85cc 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -40,6 +40,8 @@ import java.nio.channels.FileLock; import java.util.Properties; import java.util.concurrent.Semaphore; +import sun.security.action.GetBooleanAction; + import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.protocol.ClientIdentification; @@ -597,6 +599,7 @@ public final class yacy { if (OS.isWindows) headless = false; if (args.length >= 1 && args[0].toLowerCase().equals("-gui")) headless = false; System.setProperty("java.awt.headless", headless ? "true" : "false"); + System.setProperty("java.net.preferIPv4Stack", "true"); String s = ""; for (final String a: args) s += a + " "; yacyRelease.startParameter = s.trim();