@ -18,7 +18,6 @@
* If not , see < http : //www.gnu.org/licenses/>.
* /
import java.net.InetAddress ;
import java.net.MalformedURLException ;
import java.util.ConcurrentModificationException ;
@ -42,11 +41,15 @@ import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
public class CrawlStartScanner_p {
public class CrawlStartScanner_p
{
private final static int CONCURRENT_RUNNER = 100 ;
public static serverObjects respond ( final RequestHeader header , final serverObjects post , final serverSwitch env ) {
public static serverObjects respond (
final RequestHeader header ,
final serverObjects post ,
final serverSwitch env ) {
final serverObjects prop = new serverObjects ( ) ;
final Switchboard sb = ( Switchboard ) env ;
@ -69,13 +72,20 @@ public class CrawlStartScanner_p {
if ( hosts . length ( ) = = 0 ) {
InetAddress ip ;
if ( sb . isIntranetMode ( ) ) {
if ( ips . size ( ) > 0 ) ip = ips . iterator ( ) . next ( ) ;
else ip = Domains . dnsResolve ( "192.168.0.1" ) ;
if ( ips . size ( ) > 0 ) {
ip = ips . iterator ( ) . next ( ) ;
} else {
ip = Domains . dnsResolve ( "192.168.0.1" ) ;
}
} else {
ip = Domains . myPublicLocalIP ( ) ;
if ( Domains . isThisHostIP ( ip ) ) ip = sb . peers . mySeed ( ) . getInetAddress ( ) ;
if ( Domains . isThisHostIP ( ip ) ) {
ip = sb . peers . mySeed ( ) . getInetAddress ( ) ;
}
}
if ( ip ! = null ) {
hosts = ip . getHostAddress ( ) ;
}
if ( ip ! = null ) hosts = ip . getHostAddress ( ) ;
}
prop . put ( "scanhosts" , hosts ) ;
@ -89,33 +99,58 @@ public class CrawlStartScanner_p {
if ( post . get ( "rescan" , "" ) . equals ( "scheduler" ) ) {
repeat_time = post . getInt ( "repeat_time" , - 1 ) ;
repeat_unit = post . get ( "repeat_unit" , "selminutes" ) ; // selminutes, selhours, seldays
if ( repeat_unit . equals ( "selminutes" ) ) validTime = repeat_time * 60 * 1000 ;
if ( repeat_unit . equals ( "selhours" ) ) validTime = repeat_time * 60 * 60 * 1000 ;
if ( repeat_unit . equals ( "seldays" ) ) validTime = repeat_time * 24 * 60 * 60 * 1000 ;
if ( repeat_unit . equals ( "selminutes" ) ) {
validTime = repeat_time * 60 * 1000 ;
}
if ( repeat_unit . equals ( "selhours" ) ) {
validTime = repeat_time * 60 * 60 * 1000 ;
}
if ( repeat_unit . equals ( "seldays" ) ) {
validTime = repeat_time * 24 * 60 * 60 * 1000 ;
}
}
final boolean bigrange = post . getBoolean ( "bigrange" , false ) ;
final boolean bigrange = post . get ( "subnet" , "24" ) . equals ( "16" ) ;
// case: an IP range was given; scan the range for services and display result
if ( post . containsKey ( "scan" ) & & "hosts" . equals ( post . get ( "source" , "" ) ) ) {
final Set < InetAddress > ia = new HashSet < InetAddress > ( ) ;
for ( String host : hosts . split ( "," ) ) {
if ( host . startsWith ( "http://" ) ) host = host . substring ( 7 ) ;
if ( host . startsWith ( "https://" ) ) host = host . substring ( 8 ) ;
if ( host . startsWith ( "ftp://" ) ) host = host . substring ( 6 ) ;
if ( host . startsWith ( "smb://" ) ) host = host . substring ( 6 ) ;
if ( host . startsWith ( "http://" ) ) {
host = host . substring ( 7 ) ;
}
if ( host . startsWith ( "https://" ) ) {
host = host . substring ( 8 ) ;
}
if ( host . startsWith ( "ftp://" ) ) {
host = host . substring ( 6 ) ;
}
if ( host . startsWith ( "smb://" ) ) {
host = host . substring ( 6 ) ;
}
final int p = host . indexOf ( '/' , 0 ) ;
if ( p > = 0 ) host = host . substring ( 0 , p ) ;
if ( p > = 0 ) {
host = host . substring ( 0 , p ) ;
}
ia . add ( Domains . dnsResolve ( host ) ) ;
}
final Scanner scanner = new Scanner ( ia , CONCURRENT_RUNNER , timeout ) ;
if ( post . get ( "scanftp" , "" ) . equals ( "on" ) ) scanner . addFTP ( bigrange ) ;
if ( post . get ( "scanhttp" , "" ) . equals ( "on" ) ) scanner . addHTTP ( bigrange ) ;
if ( post . get ( "scanhttps" , "" ) . equals ( "on" ) ) scanner . addHTTPS ( bigrange ) ;
if ( post . get ( "scansmb" , "" ) . equals ( "on" ) ) scanner . addSMB ( bigrange ) ;
if ( post . get ( "scanftp" , "" ) . equals ( "on" ) ) {
scanner . addFTP ( bigrange ) ;
}
if ( post . get ( "scanhttp" , "" ) . equals ( "on" ) ) {
scanner . addHTTP ( bigrange ) ;
}
if ( post . get ( "scanhttps" , "" ) . equals ( "on" ) ) {
scanner . addHTTPS ( bigrange ) ;
}
if ( post . get ( "scansmb" , "" ) . equals ( "on" ) ) {
scanner . addSMB ( bigrange ) ;
}
scanner . start ( ) ;
scanner . terminate ( ) ;
if ( "on" . equals ( post . get ( "accumulatescancache" , "" ) ) & & ! "scheduler" . equals ( post . get ( "rescan" , "" ) ) ) {
if ( "on" . equals ( post . get ( "accumulatescancache" , "" ) )
& & ! "scheduler" . equals ( post . get ( "rescan" , "" ) ) ) {
Scanner . scancacheExtend ( scanner , validTime ) ;
} else {
Scanner . scancacheReplace ( scanner , validTime ) ;
@ -124,13 +159,22 @@ public class CrawlStartScanner_p {
if ( post . containsKey ( "scan" ) & & "intranet" . equals ( post . get ( "source" , "" ) ) ) {
final Scanner scanner = new Scanner ( Domains . myIntranetIPs ( ) , CONCURRENT_RUNNER , timeout ) ;
if ( "on" . equals ( post . get ( "scanftp" , "" ) ) ) scanner . addFTP ( bigrange ) ;
if ( "on" . equals ( post . get ( "scanhttp" , "" ) ) ) scanner . addHTTP ( bigrange ) ;
if ( "on" . equals ( post . get ( "scanhttps" , "" ) ) ) scanner . addHTTPS ( bigrange ) ;
if ( "on" . equals ( post . get ( "scansmb" , "" ) ) ) scanner . addSMB ( bigrange ) ;
if ( "on" . equals ( post . get ( "scanftp" , "" ) ) ) {
scanner . addFTP ( bigrange ) ;
}
if ( "on" . equals ( post . get ( "scanhttp" , "" ) ) ) {
scanner . addHTTP ( bigrange ) ;
}
if ( "on" . equals ( post . get ( "scanhttps" , "" ) ) ) {
scanner . addHTTPS ( bigrange ) ;
}
if ( "on" . equals ( post . get ( "scansmb" , "" ) ) ) {
scanner . addSMB ( bigrange ) ;
}
scanner . start ( ) ;
scanner . terminate ( ) ;
if ( "on" . equals ( post . get ( "accumulatescancache" , "" ) ) & & ! "scheduler" . equals ( post . get ( "rescan" , "" ) ) ) {
if ( "on" . equals ( post . get ( "accumulatescancache" , "" ) )
& & ! "scheduler" . equals ( post . get ( "rescan" , "" ) ) ) {
Scanner . scancacheExtend ( scanner , validTime ) ;
} else {
Scanner . scancacheReplace ( scanner , validTime ) ;
@ -141,7 +185,8 @@ public class CrawlStartScanner_p {
if ( post . containsKey ( "crawl" ) ) {
// make a pk/url mapping
final Iterator < Map . Entry < Scanner . Service , Scanner . Access > > se = Scanner . scancacheEntries ( ) ;
final Map < byte [ ] , DigestURI > pkmap = new TreeMap < byte [ ] , DigestURI > ( Base64Order . enhancedCoder ) ;
final Map < byte [ ] , DigestURI > pkmap =
new TreeMap < byte [ ] , DigestURI > ( Base64Order . enhancedCoder ) ;
while ( se . hasNext ( ) ) {
final Scanner . Service u = se . next ( ) . getKey ( ) ;
DigestURI uu ;
@ -158,9 +203,15 @@ public class CrawlStartScanner_p {
final byte [ ] pk = entry . getValue ( ) . substring ( 5 ) . getBytes ( ) ;
final DigestURI url = pkmap . get ( pk ) ;
if ( url ! = null ) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99" ;
String path =
"/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99" ;
path + = "&crawlingURL=" + url . toNormalform ( true , false ) ;
WorkTables . execAPICall ( "localhost" , ( int ) sb . getConfigLong ( "port" , 8090 ) , sb . getConfig ( SwitchboardConstants . ADMIN_ACCOUNT_B64MD5 , "" ) , path , pk ) ;
WorkTables . execAPICall (
"localhost" ,
( int ) sb . getConfigLong ( "port" , 8090 ) ,
sb . getConfig ( SwitchboardConstants . ADMIN_ACCOUNT_B64MD5 , "" ) ,
path ,
pk ) ;
}
}
}
@ -172,7 +223,13 @@ public class CrawlStartScanner_p {
// store this call as api call
if ( repeat_time > 0 ) {
// store as scheduled api call
sb . tables . recordAPICall ( post , "CrawlStartScanner_p.html" , WorkTables . TABLE_API_TYPE_CRAWLER , "network scanner for hosts: " + hosts , repeat_time , repeat_unit . substring ( 3 ) ) ;
sb . tables . recordAPICall (
post ,
"CrawlStartScanner_p.html" ,
WorkTables . TABLE_API_TYPE_CRAWLER ,
"network scanner for hosts: " + hosts ,
repeat_time ,
repeat_unit . substring ( 3 ) ) ;
}
// execute the scan results
@ -183,23 +240,32 @@ public class CrawlStartScanner_p {
String urlString ;
DigestURI u ;
try {
final Iterator < Map . Entry < Scanner . Service , Scanner . Access > > se = Scanner . scancacheEntries ( ) ;
final Iterator < Map . Entry < Scanner . Service , Scanner . Access > > se =
Scanner . scancacheEntries ( ) ;
Map . Entry < Scanner . Service , Scanner . Access > host ;
while ( se . hasNext ( ) ) {
host = se . next ( ) ;
try {
u = new DigestURI ( host . getKey ( ) . url ( ) ) ;
urlString = u . toNormalform ( true , false ) ;
if ( host . getValue ( ) = = Access . granted & & Scanner . inIndex ( apiCommentCache , urlString ) = = null ) {
String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99" ;
if ( host . getValue ( ) = = Access . granted
& & Scanner . inIndex ( apiCommentCache , urlString ) = = null ) {
String path =
"/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99" ;
path + = "&crawlingURL=" + urlString ;
WorkTables . execAPICall ( "localhost" , ( int ) sb . getConfigLong ( "port" , 8090 ) , sb . getConfig ( SwitchboardConstants . ADMIN_ACCOUNT_B64MD5 , "" ) , path , u . hash ( ) ) ;
WorkTables . execAPICall (
"localhost" ,
( int ) sb . getConfigLong ( "port" , 8090 ) ,
sb . getConfig ( SwitchboardConstants . ADMIN_ACCOUNT_B64MD5 , "" ) ,
path ,
u . hash ( ) ) ;
}
} catch ( final MalformedURLException e ) {
Log . logException ( e ) ;
}
}
} catch ( final ConcurrentModificationException e ) { }
} catch ( final ConcurrentModificationException e ) {
}
}
}