added new network scanner

- you can scan any ip or host in the internet for services
- this replaces the intranet scanner

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7371 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 586cbee2bb
commit acab6801d9

@ -53,7 +53,7 @@ public class CrawlStartIntranet_p {
}
// if there are no intranet addresses known, scan the net
if (sb.intranetURLs.size() == 0) {
if (Scanner.intranetURLs.size() == 0) {
Scanner scanner = new Scanner(100, 10);
scanner.addFTP(false);
scanner.addHTTP(false);
@ -64,7 +64,7 @@ public class CrawlStartIntranet_p {
DigestURI url;
for (MultiProtocolURI service: scanner.services()) {
url = new DigestURI(service);
sb.intranetURLs.put(url.hash(), url);
Scanner.intranetURLs.put(url.hash(), url);
}
}
@ -73,7 +73,7 @@ public class CrawlStartIntranet_p {
for (Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) {
byte [] pk = entry.getValue().substring(5).getBytes();
DigestURI url = sb.intranetURLs.get(pk);
DigestURI url = Scanner.intranetURLs.get(pk);
if (url != null) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
path += "&crawlingURL=" + url.toNormalform(true, false);
@ -87,7 +87,7 @@ public class CrawlStartIntranet_p {
prop.put("servertable", 1);
int i = 0;
String urlString;
for (final DigestURI url: sb.intranetURLs.values()) {
for (final DigestURI url: Scanner.intranetURLs.values()) {
urlString = url.toNormalform(true, false);
prop.put("servertable_list_" + i + "_pk", new String(url.hash()));
prop.put("servertable_list_" + i + "_count", i);

@ -0,0 +1,90 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Network Scanner</title>
#%env/templates/metas.template%#
<script type="text/javascript">
<!--
function setall(name) {
var selectForm = document.forms.namedItem(name);
var count = selectForm.elements["num"].value;
for (i = 0; i < count; i++) {
if (selectForm.elements["item_" + i] == null) continue;
selectForm.elements["item_" + i].checked = !selectForm.elements["item_" + i].checked;
}
}
-->
</script>
<script type="text/javascript" src="/js/sorttable.js"></script>
</head>
<body id="IndexCreate">
#%env/templates/header.template%#
#%env/templates/submenuIndexCreate.template%#
<h2>Network Scanner</h2>
#(selectiprange)#::
<p>
YaCy can scan a network segment for available http, ftp and smb server.
You must first select a IP range and then, after this range is scanned,
it is possible to select servers that had been found for a full-site crawl.
</p>
#(/selectiprange)#
#(noserverdetected)#::
<p>
No servers had been detected in the given IP range #[iprange]#. Please enter a different IP range for another scan.
</p>
#(/noserverdetected)#
#(enterrange)#::
<p>
<form id="enterrange" name="enterrange" action="CrawlStartScanner_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8" ><fieldset>
<legend><label for="servertable">Enter IP Range for Scanner</label></legend>
<input type="text" name="ip4-0" value="#[ip4-0]#" size="3" maxlength="3" />.
<input type="text" name="ip4-1" value="#[ip4-1]#" size="3" maxlength="3" />.
<input type="text" name="ip4-2" value="#[ip4-2]#" size="3" maxlength="3" />.[1-254]
<input type="submit" name="scanip" value="Scan this IP range for services" />
</fieldset></form>
<form id="enterrange" name="enterrange" action="CrawlStartScanner_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8" ><fieldset>
<legend><label for="servertable">Enter Host Name for Scanner</label></legend>
<input type="text" name="host" value="#[host]#" size="28" maxlength="60" />
<input type="submit" name="scanhost" value="Scan this Host for services" />
</fieldset></form>
</p>
#(/enterrange)#
#(servertable)#::
<p>
The following servers had been detected:
</p>
<form id="servertable" name="servertable" action="CrawlStartScanner_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8" ><fieldset>
<legend><label for="servertable">Available server within the given IP range</label></legend>
<table class="sortable" border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td><input type="checkbox" name="allswitch" onclick="setall(this.form.name)" /></td>
<td>Protocol</td>
<td>IP</td>
<td>URL</td>
<td>Process</td>
</tr>
#{list}#
<tr class="TableCellLight">
<td align="left"><input type="checkbox" name="item_#[count]#" value="mark_#[pk]#" /></td>
<td>#[protocol]#</td>
<td><a href="#[url]#">#[ip]#</a></td>
<td><a href="#[url]#">#[url]#</a></td>
#(process)#<td class="error">not in index</td>::<td class="commit">indexed</td>#(/process)#
</tr>
#{/list}#
</table>
<p>
<input type="hidden" name="num" value="#[num]#" />
<input type="submit" name="crawl" value="Add Selected Servers to Crawler" />
</p>
</fieldset></form>
#(/servertable)#
#%env/templates/footer.template%#
</body>
</html>

@ -0,0 +1,179 @@
/**
* CrawlStartScanner_p
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 12.12.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.Scanner;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import de.anomic.data.WorkTables;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class CrawlStartScanner_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard)env;
prop.put("selectiprange", 0);
prop.put("noserverdetected", 0);
prop.put("enterrange", 0);
prop.put("servertable", 0);
addSelectIPRange(sb, prop);
addScantable(sb, prop);
// case: no query part of the request; ask for input
if (post == null) {
prop.put("selectiprange", 1);
return prop;
}
// case: an IP range was given; scan the range for services and display result
if (post.containsKey("scanip") || post.containsKey("scanhost")) {
addSelectIPRange(sb, prop);
InetAddress ia;
try {
if (post.containsKey("scanip")) {
ia = InetAddress.getByAddress(new byte[]{(byte) post.getInt("ip4-0", 0), (byte) post.getInt("ip4-1", 0), (byte) post.getInt("ip4-2", 0), (byte) post.getInt("ip4-3", 0)});
} else {
ia = InetAddress.getByName(post.get("host", ""));
}
addSelectIPRange(ia, prop);
Scanner scanner = new Scanner(ia, 100, sb.isIntranetMode() ? 100 : 3000);
scanner.addFTP(false);
scanner.addHTTP(false);
scanner.addHTTPS(false);
scanner.addSMB(false);
scanner.start();
scanner.terminate();
Scanner.scancache = scanner.services();
addScantable(sb, prop);
} catch (UnknownHostException e) {}
}
// check crawl request
if (post != null && post.containsKey("crawl")) {
// make a pk/url mapping
Map<byte[], DigestURI> pkmap = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
for (MultiProtocolURI u: Scanner.scancache) {
DigestURI uu = new DigestURI(u);
pkmap.put(uu.hash(), uu);
}
// search for crawl start requests in this mapping
for (Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getValue().startsWith("mark_")) {
byte [] pk = entry.getValue().substring(5).getBytes();
DigestURI url = pkmap.get(pk);
if (url != null) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99";
path += "&crawlingURL=" + url.toNormalform(true, false);
WorkTables.execAPICall("localhost", (int) sb.getConfigLong("port", 8080), sb.getConfig("adminAccountBase64MD5", ""), path, pk);
}
}
}
}
return prop;
}
private static void addSelectIPRange(Switchboard sb, serverObjects prop) {
InetAddress ip;
if (sb.isIntranetMode()) {
List<InetAddress> ips = Domains.myIntranetIPs();
if (ips.size() > 0) ip = ips.get(0); else try {
ip = InetAddress.getByName("192.168.0.1");
} catch (UnknownHostException e) {
ip = null;
e.printStackTrace();
}
} else {
ip = Domains.myPublicLocalIP();
}
addSelectIPRange(ip, prop);
}
private static void addSelectIPRange(InetAddress ip, serverObjects prop) {
prop.put("enterrange", 1);
byte[] address = ip.getAddress();
prop.put("enterrange_host", "");
prop.put("enterrange_ip4-0", 0xff & address[0]);
prop.put("enterrange_ip4-1", 0xff & address[1]);
prop.put("enterrange_ip4-2", 0xff & address[2]);
}
private static void addScantable(Switchboard sb, serverObjects prop) {
if (Scanner.scancache.size() > 0) {
// show scancache table
prop.put("servertable", 1);
int i = 0;
String urlString;
DigestURI u;
for (final MultiProtocolURI url: Scanner.scancache) {
u = new DigestURI(url);
urlString = u.toNormalform(true, false);
prop.put("servertable_list_" + i + "_pk", new String(u.hash()));
prop.put("servertable_list_" + i + "_count", i);
prop.putHTML("servertable_list_" + i + "_protocol", u.getProtocol());
prop.putHTML("servertable_list_" + i + "_ip", Domains.dnsResolve(u.getHost()).getHostAddress());
prop.putHTML("servertable_list_" + i + "_url", urlString);
prop.put("servertable_list_" + i + "_process", inIndex(sb, urlString) == null ? 0 : 1);
i++;
}
prop.put("servertable_list", i);
prop.put("servertable_num", i);
}
}
private static byte[] inIndex(Switchboard sb, String url) {
Iterator<Tables.Row> i;
try {
i = sb.tables.iterator(WorkTables.TABLE_API_NAME);
Tables.Row row;
String comment;
while (i.hasNext()) {
row = i.next();
comment = new String(row.get(WorkTables.TABLE_API_COL_COMMENT));
if (comment.contains(url)) return row.getPK();
}
return null;
} catch (IOException e) {
Log.logException(e);
return null;
}
}
}

@ -7,7 +7,7 @@
<ul class="SubMenu">
<li><a href="/CrawlStartSite_p.html" class="MenuItemLink lock">Full Site Crawl/<br/>Sitemap Loader</a></li>
<li><a href="/CrawlStartExpert_p.html" class="MenuItemLink lock">Crawl Start<br/>(Expert)</a></li>
<li><a href="/CrawlStartIntranet_p.html" class="MenuItemLink lock">Intranet<br/>Scanner</a></li>
<li><a href="/CrawlStartScanner_p.html" class="MenuItemLink lock">Network<br/>Scanner</a></li>
<li><a href="/Load_MediawikiWiki.html" class="MenuItemLink">Crawling of<br/>Media Wikis</a></li>
<li><a href="/Load_PHPBB3.html" class="MenuItemLink">Crawling of<br/>phpBB3 Forums</a></li>
</ul>

@ -230,7 +230,6 @@ public final class Switchboard extends serverSwitch {
public LinkedBlockingQueue<String> trail;
public yacySeedDB peers;
public WorkTables tables;
public SortedMap<byte[], DigestURI> intranetURLs = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder);
public WorkflowProcessor<indexingQueueEntry> indexingDocumentProcessor;
public WorkflowProcessor<indexingQueueEntry> indexingCondensementProcessor;

@ -36,7 +36,9 @@ import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
/**
* a protocol scanner
@ -47,21 +49,38 @@ public class Scanner extends Thread {
private static final MultiProtocolURI POISONURI = new MultiProtocolURI();
private static final Object PRESENT = new Object();
public static Map<byte[], DigestURI> intranetURLs = new TreeMap<byte[], DigestURI>(Base64Order.enhancedCoder); // deprecated
public static Collection<MultiProtocolURI> scancache = new ArrayList<MultiProtocolURI>(1);
private int runnerCount;
private List<InetAddress> a;
private List<InetAddress> scanrange;
private BlockingQueue<MultiProtocolURI> scanqueue;
private Map<MultiProtocolURI, String> services;
private Map<Runner, Object> runner;
private int timeout;
public Scanner(int concurrentRunner, int timeout) {
public Scanner(InetAddress scanrange, int concurrentRunner, int timeout) {
this.runnerCount = concurrentRunner;
this.a = Domains.myIntranetIPs();
this.scanrange = new ArrayList<InetAddress>();
this.scanrange.add(scanrange);
this.scanqueue = new LinkedBlockingQueue<MultiProtocolURI>();
this.services = Collections.synchronizedMap(new TreeMap<MultiProtocolURI, String>());
this.runner = new ConcurrentHashMap<Runner, Object>();
this.timeout = timeout;
}
public Scanner(List<InetAddress> scanrange, int concurrentRunner, int timeout) {
this.runnerCount = concurrentRunner;
this.scanrange = scanrange;
this.scanqueue = new LinkedBlockingQueue<MultiProtocolURI>();
this.services = Collections.synchronizedMap(new TreeMap<MultiProtocolURI, String>());
this.runner = new ConcurrentHashMap<Runner, Object>();
this.timeout = timeout;
}
public Scanner(int concurrentRunner, int timeout) {
this(Domains.myIntranetIPs(), concurrentRunner, timeout);
}
public void run() {
MultiProtocolURI uri;
@ -81,53 +100,6 @@ public class Scanner extends Thread {
Log.logException(e);
}
}
private final List<InetAddress> genlist(boolean bigrange) {
ArrayList<InetAddress> c = new ArrayList<InetAddress>(10);
for (InetAddress i: a) {
for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) {
for (int j = 1; j < 255; j++) {
byte[] address = i.getAddress();
address[2] = (byte) br;
address[3] = (byte) j;
try {
c.add(InetAddress.getByAddress(address));
} catch (UnknownHostException e) {
}
}
}
}
return c;
}
public void addHTTP(boolean bigrange) {
addProtocol("http", bigrange);
}
public void addHTTPS(boolean bigrange) {
addProtocol("https", bigrange);
}
public void addSMB(boolean bigrange) {
addProtocol("smb", bigrange);
}
public void addFTP(boolean bigrange) {
addProtocol("ftp", bigrange);
}
private void addProtocol(String protocol, boolean bigrange) {
for (InetAddress i: genlist(bigrange)) {
try {
this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostAddress() + "/"));
} catch (MalformedURLException e) {
Log.logException(e);
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
public int pending() {
return this.scanqueue.size();
@ -178,6 +150,51 @@ public class Scanner extends Thread {
}
}
public void addHTTP(boolean bigrange) {
addProtocol("http", bigrange);
}
public void addHTTPS(boolean bigrange) {
addProtocol("https", bigrange);
}
public void addSMB(boolean bigrange) {
addProtocol("smb", bigrange);
}
public void addFTP(boolean bigrange) {
addProtocol("ftp", bigrange);
}
private void addProtocol(String protocol, boolean bigrange) {
for (InetAddress i: genlist(bigrange)) {
try {
this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostAddress() + "/"));
} catch (MalformedURLException e) {
Log.logException(e);
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
private final List<InetAddress> genlist(boolean bigrange) {
ArrayList<InetAddress> c = new ArrayList<InetAddress>(10);
for (InetAddress i: scanrange) {
for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) {
for (int j = 1; j < 255; j++) {
byte[] address = i.getAddress();
address[2] = (byte) br;
address[3] = (byte) j;
try {
c.add(InetAddress.getByAddress(address));
} catch (UnknownHostException e) {
}
}
}
}
return c;
}
public Collection<MultiProtocolURI> services() {
return this.services.keySet();

Loading…
Cancel
Save