From acab6801d9b87bff321a656173b0695b3d034ef6 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 13 Dec 2010 18:19:37 +0000 Subject: [PATCH] added new network scanner - you can scan any ip or host in the internet for services - this replaces the intranet scanner git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7371 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/CrawlStartIntranet_p.java | 8 +- htroot/CrawlStartScanner_p.html | 90 +++++++++ htroot/CrawlStartScanner_p.java | 179 ++++++++++++++++++ .../env/templates/submenuIndexCreate.template | 2 +- source/de/anomic/search/Switchboard.java | 1 - source/net/yacy/cora/protocol/Scanner.java | 119 +++++++----- 6 files changed, 342 insertions(+), 57 deletions(-) create mode 100644 htroot/CrawlStartScanner_p.html create mode 100644 htroot/CrawlStartScanner_p.java diff --git a/htroot/CrawlStartIntranet_p.java b/htroot/CrawlStartIntranet_p.java index c89bd8b84..035763fd6 100644 --- a/htroot/CrawlStartIntranet_p.java +++ b/htroot/CrawlStartIntranet_p.java @@ -53,7 +53,7 @@ public class CrawlStartIntranet_p { } // if there are no intranet addresses known, scan the net - if (sb.intranetURLs.size() == 0) { + if (Scanner.intranetURLs.size() == 0) { Scanner scanner = new Scanner(100, 10); scanner.addFTP(false); scanner.addHTTP(false); @@ -64,7 +64,7 @@ public class CrawlStartIntranet_p { DigestURI url; for (MultiProtocolURI service: scanner.services()) { url = new DigestURI(service); - sb.intranetURLs.put(url.hash(), url); + Scanner.intranetURLs.put(url.hash(), url); } } @@ -73,7 +73,7 @@ public class CrawlStartIntranet_p { for (Map.Entry entry: post.entrySet()) { if (entry.getValue().startsWith("mark_")) { byte [] pk = entry.getValue().substring(5).getBytes(); - DigestURI url = sb.intranetURLs.get(pk); + DigestURI url = Scanner.intranetURLs.get(pk); if (url != null) { String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99"; path += "&crawlingURL=" + url.toNormalform(true, false); @@ -87,7 +87,7 @@ public class CrawlStartIntranet_p { prop.put("servertable", 1); int i = 0; String urlString; - for (final DigestURI url: sb.intranetURLs.values()) { + for (final DigestURI url: Scanner.intranetURLs.values()) { urlString = url.toNormalform(true, false); prop.put("servertable_list_" + i + "_pk", new String(url.hash())); prop.put("servertable_list_" + i + "_count", i); diff --git a/htroot/CrawlStartScanner_p.html b/htroot/CrawlStartScanner_p.html new file mode 100644 index 000000000..c9c5b4189 --- /dev/null +++ b/htroot/CrawlStartScanner_p.html @@ -0,0 +1,90 @@ + + + + YaCy '#[clientname]#': Network Scanner + #%env/templates/metas.template%# + + + + + #%env/templates/header.template%# + #%env/templates/submenuIndexCreate.template%# +

Network Scanner

+ + #(selectiprange)#:: +

+ YaCy can scan a network segment for available http, ftp and smb server. + You must first select a IP range and then, after this range is scanned, + it is possible to select servers that had been found for a full-site crawl. +

+ #(/selectiprange)# + + #(noserverdetected)#:: +

+ No servers had been detected in the given IP range #[iprange]#. Please enter a different IP range for another scan. +

+ #(/noserverdetected)# + + #(enterrange)#:: +

+

+ + . + . + .[1-254] + +
+
+ + + +
+

+ #(/enterrange)# + + #(servertable)#:: +

+ The following servers had been detected: +

+
+ + + + + + + + + + #{list}# + + + + + + #(process)#::#(/process)# + + #{/list}# +
ProtocolIPURLProcess
#[protocol]##[ip]##[url]#not in indexindexed
+

+ + +

+
+ #(/servertable)# + + + #%env/templates/footer.template%# + + diff --git a/htroot/CrawlStartScanner_p.java b/htroot/CrawlStartScanner_p.java new file mode 100644 index 000000000..083f5b781 --- /dev/null +++ b/htroot/CrawlStartScanner_p.java @@ -0,0 +1,179 @@ +/** + * CrawlStartScanner_p + * Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 12.12.2010 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + + +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.Domains; +import net.yacy.cora.protocol.RequestHeader; +import net.yacy.cora.protocol.Scanner; +import net.yacy.kelondro.blob.Tables; +import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.Base64Order; + +import de.anomic.data.WorkTables; +import de.anomic.search.Switchboard; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; + +public class CrawlStartScanner_p { + + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { + + final serverObjects prop = new serverObjects(); + final Switchboard sb = (Switchboard)env; + + prop.put("selectiprange", 0); + prop.put("noserverdetected", 0); + prop.put("enterrange", 0); + prop.put("servertable", 0); + + addSelectIPRange(sb, prop); + addScantable(sb, prop); + + // case: no query part of the request; ask for input + if (post == null) { + prop.put("selectiprange", 1); + return prop; + } + + // case: an IP range was given; scan the range for services and display result + if (post.containsKey("scanip") || post.containsKey("scanhost")) { + addSelectIPRange(sb, prop); + InetAddress ia; + try { + if (post.containsKey("scanip")) { + ia = InetAddress.getByAddress(new byte[]{(byte) post.getInt("ip4-0", 0), (byte) post.getInt("ip4-1", 0), (byte) post.getInt("ip4-2", 0), (byte) post.getInt("ip4-3", 0)}); + } else { + ia = InetAddress.getByName(post.get("host", "")); + } + addSelectIPRange(ia, prop); + Scanner scanner = new Scanner(ia, 100, sb.isIntranetMode() ? 100 : 3000); + scanner.addFTP(false); + scanner.addHTTP(false); + scanner.addHTTPS(false); + scanner.addSMB(false); + scanner.start(); + scanner.terminate(); + Scanner.scancache = scanner.services(); + addScantable(sb, prop); + } catch (UnknownHostException e) {} + } + + // check crawl request + if (post != null && post.containsKey("crawl")) { + // make a pk/url mapping + Map pkmap = new TreeMap(Base64Order.enhancedCoder); + for (MultiProtocolURI u: Scanner.scancache) { + DigestURI uu = new DigestURI(u); + pkmap.put(uu.hash(), uu); + } + // search for crawl start requests in this mapping + for (Map.Entry entry: post.entrySet()) { + if (entry.getValue().startsWith("mark_")) { + byte [] pk = entry.getValue().substring(5).getBytes(); + DigestURI url = pkmap.get(pk); + if (url != null) { + String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99"; + path += "&crawlingURL=" + url.toNormalform(true, false); + WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", ""), path, pk); + } + } + } + } + + return prop; + } + + private static void addSelectIPRange(Switchboard sb, serverObjects prop) { + InetAddress ip; + if (sb.isIntranetMode()) { + List ips = Domains.myIntranetIPs(); + if (ips.size() > 0) ip = ips.get(0); else try { + ip = InetAddress.getByName("192.168.0.1"); + } catch (UnknownHostException e) { + ip = null; + e.printStackTrace(); + } + } else { + ip = Domains.myPublicLocalIP(); + } + addSelectIPRange(ip, prop); + } + + private static void addSelectIPRange(InetAddress ip, serverObjects prop) { + prop.put("enterrange", 1); + byte[] address = ip.getAddress(); + prop.put("enterrange_host", ""); + prop.put("enterrange_ip4-0", 0xff & address[0]); + prop.put("enterrange_ip4-1", 0xff & address[1]); + prop.put("enterrange_ip4-2", 0xff & address[2]); + } + + private static void addScantable(Switchboard sb, serverObjects prop) { + if (Scanner.scancache.size() > 0) { + // show scancache table + prop.put("servertable", 1); + int i = 0; + String urlString; + DigestURI u; + for (final MultiProtocolURI url: Scanner.scancache) { + u = new DigestURI(url); + urlString = u.toNormalform(true, false); + prop.put("servertable_list_" + i + "_pk", new String(u.hash())); + prop.put("servertable_list_" + i + "_count", i); + prop.putHTML("servertable_list_" + i + "_protocol", u.getProtocol()); + prop.putHTML("servertable_list_" + i + "_ip", Domains.dnsResolve(u.getHost()).getHostAddress()); + prop.putHTML("servertable_list_" + i + "_url", urlString); + prop.put("servertable_list_" + i + "_process", inIndex(sb, urlString) == null ? 0 : 1); + i++; + } + prop.put("servertable_list", i); + prop.put("servertable_num", i); + } + } + + private static byte[] inIndex(Switchboard sb, String url) { + Iterator i; + try { + i = sb.tables.iterator(WorkTables.TABLE_API_NAME); + Tables.Row row; + String comment; + while (i.hasNext()) { + row = i.next(); + comment = new String(row.get(WorkTables.TABLE_API_COL_COMMENT)); + if (comment.contains(url)) return row.getPK(); + } + return null; + } catch (IOException e) { + Log.logException(e); + return null; + } + } + +} diff --git a/htroot/env/templates/submenuIndexCreate.template b/htroot/env/templates/submenuIndexCreate.template index 3fb9d82e2..f5daf42fe 100644 --- a/htroot/env/templates/submenuIndexCreate.template +++ b/htroot/env/templates/submenuIndexCreate.template @@ -7,7 +7,7 @@ diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 411424fd7..18ffe3a36 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -230,7 +230,6 @@ public final class Switchboard extends serverSwitch { public LinkedBlockingQueue trail; public yacySeedDB peers; public WorkTables tables; - public SortedMap intranetURLs = new TreeMap(Base64Order.enhancedCoder); public WorkflowProcessor indexingDocumentProcessor; public WorkflowProcessor indexingCondensementProcessor; diff --git a/source/net/yacy/cora/protocol/Scanner.java b/source/net/yacy/cora/protocol/Scanner.java index 27e9d6030..5eef48f7e 100644 --- a/source/net/yacy/cora/protocol/Scanner.java +++ b/source/net/yacy/cora/protocol/Scanner.java @@ -36,7 +36,9 @@ import java.util.concurrent.LinkedBlockingQueue; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.protocol.http.HTTPClient; +import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.Base64Order; /** * a protocol scanner @@ -47,21 +49,38 @@ public class Scanner extends Thread { private static final MultiProtocolURI POISONURI = new MultiProtocolURI(); private static final Object PRESENT = new Object(); + public static Map intranetURLs = new TreeMap(Base64Order.enhancedCoder); // deprecated + public static Collection scancache = new ArrayList(1); + private int runnerCount; - private List a; + private List scanrange; private BlockingQueue scanqueue; private Map services; private Map runner; private int timeout; - - public Scanner(int concurrentRunner, int timeout) { + + public Scanner(InetAddress scanrange, int concurrentRunner, int timeout) { this.runnerCount = concurrentRunner; - this.a = Domains.myIntranetIPs(); + this.scanrange = new ArrayList(); + this.scanrange.add(scanrange); this.scanqueue = new LinkedBlockingQueue(); this.services = Collections.synchronizedMap(new TreeMap()); this.runner = new ConcurrentHashMap(); this.timeout = timeout; } + + public Scanner(List scanrange, int concurrentRunner, int timeout) { + this.runnerCount = concurrentRunner; + this.scanrange = scanrange; + this.scanqueue = new LinkedBlockingQueue(); + this.services = Collections.synchronizedMap(new TreeMap()); + this.runner = new ConcurrentHashMap(); + this.timeout = timeout; + } + + public Scanner(int concurrentRunner, int timeout) { + this(Domains.myIntranetIPs(), concurrentRunner, timeout); + } public void run() { MultiProtocolURI uri; @@ -81,53 +100,6 @@ public class Scanner extends Thread { Log.logException(e); } } - - private final List genlist(boolean bigrange) { - ArrayList c = new ArrayList(10); - for (InetAddress i: a) { - for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) { - for (int j = 1; j < 255; j++) { - byte[] address = i.getAddress(); - address[2] = (byte) br; - address[3] = (byte) j; - try { - c.add(InetAddress.getByAddress(address)); - } catch (UnknownHostException e) { - } - } - } - } - return c; - } - - public void addHTTP(boolean bigrange) { - addProtocol("http", bigrange); - } - - public void addHTTPS(boolean bigrange) { - addProtocol("https", bigrange); - } - - public void addSMB(boolean bigrange) { - addProtocol("smb", bigrange); - } - - public void addFTP(boolean bigrange) { - addProtocol("ftp", bigrange); - } - - private void addProtocol(String protocol, boolean bigrange) { - for (InetAddress i: genlist(bigrange)) { - try { - - this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostAddress() + "/")); - } catch (MalformedURLException e) { - Log.logException(e); - } catch (InterruptedException e) { - Log.logException(e); - } - } - } public int pending() { return this.scanqueue.size(); @@ -178,6 +150,51 @@ public class Scanner extends Thread { } } + public void addHTTP(boolean bigrange) { + addProtocol("http", bigrange); + } + + public void addHTTPS(boolean bigrange) { + addProtocol("https", bigrange); + } + + public void addSMB(boolean bigrange) { + addProtocol("smb", bigrange); + } + + public void addFTP(boolean bigrange) { + addProtocol("ftp", bigrange); + } + + private void addProtocol(String protocol, boolean bigrange) { + for (InetAddress i: genlist(bigrange)) { + try { + this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostAddress() + "/")); + } catch (MalformedURLException e) { + Log.logException(e); + } catch (InterruptedException e) { + Log.logException(e); + } + } + } + + private final List genlist(boolean bigrange) { + ArrayList c = new ArrayList(10); + for (InetAddress i: scanrange) { + for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) { + for (int j = 1; j < 255; j++) { + byte[] address = i.getAddress(); + address[2] = (byte) br; + address[3] = (byte) j; + try { + c.add(InetAddress.getByAddress(address)); + } catch (UnknownHostException e) { + } + } + } + } + return c; + } public Collection services() { return this.services.keySet();