From acab6801d9b87bff321a656173b0695b3d034ef6 Mon Sep 17 00:00:00 2001
From: orbiter
Date: Mon, 13 Dec 2010 18:19:37 +0000
Subject: [PATCH] added new network scanner - you can scan any ip or host in
the internet for services - this replaces the intranet scanner
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7371 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
htroot/CrawlStartIntranet_p.java | 8 +-
htroot/CrawlStartScanner_p.html | 90 +++++++++
htroot/CrawlStartScanner_p.java | 179 ++++++++++++++++++
.../env/templates/submenuIndexCreate.template | 2 +-
source/de/anomic/search/Switchboard.java | 1 -
source/net/yacy/cora/protocol/Scanner.java | 119 +++++++-----
6 files changed, 342 insertions(+), 57 deletions(-)
create mode 100644 htroot/CrawlStartScanner_p.html
create mode 100644 htroot/CrawlStartScanner_p.java
diff --git a/htroot/CrawlStartIntranet_p.java b/htroot/CrawlStartIntranet_p.java
index c89bd8b84..035763fd6 100644
--- a/htroot/CrawlStartIntranet_p.java
+++ b/htroot/CrawlStartIntranet_p.java
@@ -53,7 +53,7 @@ public class CrawlStartIntranet_p {
}
// if there are no intranet addresses known, scan the net
- if (sb.intranetURLs.size() == 0) {
+ if (Scanner.intranetURLs.size() == 0) {
Scanner scanner = new Scanner(100, 10);
scanner.addFTP(false);
scanner.addHTTP(false);
@@ -64,7 +64,7 @@ public class CrawlStartIntranet_p {
DigestURI url;
for (MultiProtocolURI service: scanner.services()) {
url = new DigestURI(service);
- sb.intranetURLs.put(url.hash(), url);
+ Scanner.intranetURLs.put(url.hash(), url);
}
}
@@ -73,7 +73,7 @@ public class CrawlStartIntranet_p {
for (Map.Entry entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) {
byte [] pk = entry.getValue().substring(5).getBytes();
- DigestURI url = sb.intranetURLs.get(pk);
+ DigestURI url = Scanner.intranetURLs.get(pk);
if (url != null) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
path += "&crawlingURL=" + url.toNormalform(true, false);
@@ -87,7 +87,7 @@ public class CrawlStartIntranet_p {
prop.put("servertable", 1);
int i = 0;
String urlString;
- for (final DigestURI url: sb.intranetURLs.values()) {
+ for (final DigestURI url: Scanner.intranetURLs.values()) {
urlString = url.toNormalform(true, false);
prop.put("servertable_list_" + i + "_pk", new String(url.hash()));
prop.put("servertable_list_" + i + "_count", i);
diff --git a/htroot/CrawlStartScanner_p.html b/htroot/CrawlStartScanner_p.html
new file mode 100644
index 000000000..c9c5b4189
--- /dev/null
+++ b/htroot/CrawlStartScanner_p.html
@@ -0,0 +1,90 @@
+
+
+
+ YaCy '#[clientname]#': Network Scanner
+ #%env/templates/metas.template%#
+
+
+
+
+ #%env/templates/header.template%#
+ #%env/templates/submenuIndexCreate.template%#
+ Network Scanner
+
+ #(selectiprange)#::
+
+ YaCy can scan a network segment for available http, ftp and smb server.
+ You must first select a IP range and then, after this range is scanned,
+ it is possible to select servers that had been found for a full-site crawl.
+
+ #(/selectiprange)#
+
+ #(noserverdetected)#::
+
+ No servers had been detected in the given IP range #[iprange]#. Please enter a different IP range for another scan.
+
+ #(/noserverdetected)#
+
+ #(enterrange)#::
+
+
+
+
+ #(/enterrange)#
+
+ #(servertable)#::
+
+ The following servers had been detected:
+
+
+ #(/servertable)#
+
+
+ #%env/templates/footer.template%#
+
+
diff --git a/htroot/CrawlStartScanner_p.java b/htroot/CrawlStartScanner_p.java
new file mode 100644
index 000000000..083f5b781
--- /dev/null
+++ b/htroot/CrawlStartScanner_p.java
@@ -0,0 +1,179 @@
+/**
+ * CrawlStartScanner_p
+ * Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
+ * First released 12.12.2010 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.protocol.Domains;
+import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.protocol.Scanner;
+import net.yacy.kelondro.blob.Tables;
+import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.kelondro.logging.Log;
+import net.yacy.kelondro.order.Base64Order;
+
+import de.anomic.data.WorkTables;
+import de.anomic.search.Switchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class CrawlStartScanner_p {
+
+ public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
+
+ final serverObjects prop = new serverObjects();
+ final Switchboard sb = (Switchboard)env;
+
+ prop.put("selectiprange", 0);
+ prop.put("noserverdetected", 0);
+ prop.put("enterrange", 0);
+ prop.put("servertable", 0);
+
+ addSelectIPRange(sb, prop);
+ addScantable(sb, prop);
+
+ // case: no query part of the request; ask for input
+ if (post == null) {
+ prop.put("selectiprange", 1);
+ return prop;
+ }
+
+ // case: an IP range was given; scan the range for services and display result
+ if (post.containsKey("scanip") || post.containsKey("scanhost")) {
+ addSelectIPRange(sb, prop);
+ InetAddress ia;
+ try {
+ if (post.containsKey("scanip")) {
+ ia = InetAddress.getByAddress(new byte[]{(byte) post.getInt("ip4-0", 0), (byte) post.getInt("ip4-1", 0), (byte) post.getInt("ip4-2", 0), (byte) post.getInt("ip4-3", 0)});
+ } else {
+ ia = InetAddress.getByName(post.get("host", ""));
+ }
+ addSelectIPRange(ia, prop);
+ Scanner scanner = new Scanner(ia, 100, sb.isIntranetMode() ? 100 : 3000);
+ scanner.addFTP(false);
+ scanner.addHTTP(false);
+ scanner.addHTTPS(false);
+ scanner.addSMB(false);
+ scanner.start();
+ scanner.terminate();
+ Scanner.scancache = scanner.services();
+ addScantable(sb, prop);
+ } catch (UnknownHostException e) {}
+ }
+
+ // check crawl request
+ if (post != null && post.containsKey("crawl")) {
+ // make a pk/url mapping
+ Map pkmap = new TreeMap(Base64Order.enhancedCoder);
+ for (MultiProtocolURI u: Scanner.scancache) {
+ DigestURI uu = new DigestURI(u);
+ pkmap.put(uu.hash(), uu);
+ }
+ // search for crawl start requests in this mapping
+ for (Map.Entry entry: post.entrySet()) {
+ if (entry.getValue().startsWith("mark_")) {
+ byte [] pk = entry.getValue().substring(5).getBytes();
+ DigestURI url = pkmap.get(pk);
+ if (url != null) {
+ String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
+ path += "&crawlingURL=" + url.toNormalform(true, false);
+ WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", ""), path, pk);
+ }
+ }
+ }
+ }
+
+ return prop;
+ }
+
+ private static void addSelectIPRange(Switchboard sb, serverObjects prop) {
+ InetAddress ip;
+ if (sb.isIntranetMode()) {
+ List ips = Domains.myIntranetIPs();
+ if (ips.size() > 0) ip = ips.get(0); else try {
+ ip = InetAddress.getByName("192.168.0.1");
+ } catch (UnknownHostException e) {
+ ip = null;
+ e.printStackTrace();
+ }
+ } else {
+ ip = Domains.myPublicLocalIP();
+ }
+ addSelectIPRange(ip, prop);
+ }
+
+ private static void addSelectIPRange(InetAddress ip, serverObjects prop) {
+ prop.put("enterrange", 1);
+ byte[] address = ip.getAddress();
+ prop.put("enterrange_host", "");
+ prop.put("enterrange_ip4-0", 0xff & address[0]);
+ prop.put("enterrange_ip4-1", 0xff & address[1]);
+ prop.put("enterrange_ip4-2", 0xff & address[2]);
+ }
+
+ private static void addScantable(Switchboard sb, serverObjects prop) {
+ if (Scanner.scancache.size() > 0) {
+ // show scancache table
+ prop.put("servertable", 1);
+ int i = 0;
+ String urlString;
+ DigestURI u;
+ for (final MultiProtocolURI url: Scanner.scancache) {
+ u = new DigestURI(url);
+ urlString = u.toNormalform(true, false);
+ prop.put("servertable_list_" + i + "_pk", new String(u.hash()));
+ prop.put("servertable_list_" + i + "_count", i);
+ prop.putHTML("servertable_list_" + i + "_protocol", u.getProtocol());
+ prop.putHTML("servertable_list_" + i + "_ip", Domains.dnsResolve(u.getHost()).getHostAddress());
+ prop.putHTML("servertable_list_" + i + "_url", urlString);
+ prop.put("servertable_list_" + i + "_process", inIndex(sb, urlString) == null ? 0 : 1);
+ i++;
+ }
+ prop.put("servertable_list", i);
+ prop.put("servertable_num", i);
+ }
+ }
+
+ private static byte[] inIndex(Switchboard sb, String url) {
+ Iterator i;
+ try {
+ i = sb.tables.iterator(WorkTables.TABLE_API_NAME);
+ Tables.Row row;
+ String comment;
+ while (i.hasNext()) {
+ row = i.next();
+ comment = new String(row.get(WorkTables.TABLE_API_COL_COMMENT));
+ if (comment.contains(url)) return row.getPK();
+ }
+ return null;
+ } catch (IOException e) {
+ Log.logException(e);
+ return null;
+ }
+ }
+
+}
diff --git a/htroot/env/templates/submenuIndexCreate.template b/htroot/env/templates/submenuIndexCreate.template
index 3fb9d82e2..f5daf42fe 100644
--- a/htroot/env/templates/submenuIndexCreate.template
+++ b/htroot/env/templates/submenuIndexCreate.template
@@ -7,7 +7,7 @@
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 411424fd7..18ffe3a36 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -230,7 +230,6 @@ public final class Switchboard extends serverSwitch {
public LinkedBlockingQueue trail;
public yacySeedDB peers;
public WorkTables tables;
- public SortedMap intranetURLs = new TreeMap(Base64Order.enhancedCoder);
public WorkflowProcessor indexingDocumentProcessor;
public WorkflowProcessor indexingCondensementProcessor;
diff --git a/source/net/yacy/cora/protocol/Scanner.java b/source/net/yacy/cora/protocol/Scanner.java
index 27e9d6030..5eef48f7e 100644
--- a/source/net/yacy/cora/protocol/Scanner.java
+++ b/source/net/yacy/cora/protocol/Scanner.java
@@ -36,7 +36,9 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient;
+import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
+import net.yacy.kelondro.order.Base64Order;
/**
* a protocol scanner
@@ -47,21 +49,38 @@ public class Scanner extends Thread {
private static final MultiProtocolURI POISONURI = new MultiProtocolURI();
private static final Object PRESENT = new Object();
+ public static Map intranetURLs = new TreeMap(Base64Order.enhancedCoder); // deprecated
+ public static Collection scancache = new ArrayList(1);
+
private int runnerCount;
- private List a;
+ private List scanrange;
private BlockingQueue scanqueue;
private Map services;
private Map runner;
private int timeout;
-
- public Scanner(int concurrentRunner, int timeout) {
+
+ public Scanner(InetAddress scanrange, int concurrentRunner, int timeout) {
this.runnerCount = concurrentRunner;
- this.a = Domains.myIntranetIPs();
+ this.scanrange = new ArrayList();
+ this.scanrange.add(scanrange);
this.scanqueue = new LinkedBlockingQueue();
this.services = Collections.synchronizedMap(new TreeMap());
this.runner = new ConcurrentHashMap();
this.timeout = timeout;
}
+
+ public Scanner(List scanrange, int concurrentRunner, int timeout) {
+ this.runnerCount = concurrentRunner;
+ this.scanrange = scanrange;
+ this.scanqueue = new LinkedBlockingQueue();
+ this.services = Collections.synchronizedMap(new TreeMap());
+ this.runner = new ConcurrentHashMap();
+ this.timeout = timeout;
+ }
+
+ public Scanner(int concurrentRunner, int timeout) {
+ this(Domains.myIntranetIPs(), concurrentRunner, timeout);
+ }
public void run() {
MultiProtocolURI uri;
@@ -81,53 +100,6 @@ public class Scanner extends Thread {
Log.logException(e);
}
}
-
- private final List genlist(boolean bigrange) {
- ArrayList c = new ArrayList(10);
- for (InetAddress i: a) {
- for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) {
- for (int j = 1; j < 255; j++) {
- byte[] address = i.getAddress();
- address[2] = (byte) br;
- address[3] = (byte) j;
- try {
- c.add(InetAddress.getByAddress(address));
- } catch (UnknownHostException e) {
- }
- }
- }
- }
- return c;
- }
-
- public void addHTTP(boolean bigrange) {
- addProtocol("http", bigrange);
- }
-
- public void addHTTPS(boolean bigrange) {
- addProtocol("https", bigrange);
- }
-
- public void addSMB(boolean bigrange) {
- addProtocol("smb", bigrange);
- }
-
- public void addFTP(boolean bigrange) {
- addProtocol("ftp", bigrange);
- }
-
- private void addProtocol(String protocol, boolean bigrange) {
- for (InetAddress i: genlist(bigrange)) {
- try {
-
- this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostAddress() + "/"));
- } catch (MalformedURLException e) {
- Log.logException(e);
- } catch (InterruptedException e) {
- Log.logException(e);
- }
- }
- }
public int pending() {
return this.scanqueue.size();
@@ -178,6 +150,51 @@ public class Scanner extends Thread {
}
}
+ public void addHTTP(boolean bigrange) {
+ addProtocol("http", bigrange);
+ }
+
+ public void addHTTPS(boolean bigrange) {
+ addProtocol("https", bigrange);
+ }
+
+ public void addSMB(boolean bigrange) {
+ addProtocol("smb", bigrange);
+ }
+
+ public void addFTP(boolean bigrange) {
+ addProtocol("ftp", bigrange);
+ }
+
+ private void addProtocol(String protocol, boolean bigrange) {
+ for (InetAddress i: genlist(bigrange)) {
+ try {
+ this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostAddress() + "/"));
+ } catch (MalformedURLException e) {
+ Log.logException(e);
+ } catch (InterruptedException e) {
+ Log.logException(e);
+ }
+ }
+ }
+
+ private final List genlist(boolean bigrange) {
+ ArrayList c = new ArrayList(10);
+ for (InetAddress i: scanrange) {
+ for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) {
+ for (int j = 1; j < 255; j++) {
+ byte[] address = i.getAddress();
+ address[2] = (byte) br;
+ address[3] = (byte) j;
+ try {
+ c.add(InetAddress.getByAddress(address));
+ } catch (UnknownHostException e) {
+ }
+ }
+ }
+ }
+ return c;
+ }
public Collection services() {
return this.services.keySet();