@ -247,9 +247,13 @@ public class Crawler_p {
if ( crawlName . length ( ) = = 0 & & sitemapURLStr . length ( ) > 0 ) crawlName = "sitemap loader for " + sitemapURLStr ;
if ( crawlName . length ( ) = = 0 & & sitemapURLStr . length ( ) > 0 ) crawlName = "sitemap loader for " + sitemapURLStr ;
// delete old robots entries
// delete old robots entries
for ( DigestURL ru : rootURLs ) {
for ( DigestURL ru : rootURLs ) {
sb . robots . delete ( ru ) ;
sb . robots . delete ( ru ) ;
try { Cache . delete ( RobotsTxt . robotsURL ( RobotsTxt . getHostPort ( ru ) ) . hash ( ) ) ; } catch ( IOException e ) { }
try {
if ( ru . getHost ( ) ! = null ) { // might be null for file://
Cache . delete ( RobotsTxt . robotsURL ( RobotsTxt . getHostPort ( ru ) ) . hash ( ) ) ;
}
} catch ( IOException e ) { }
}
}
try { sb . robots . clear ( ) ; } catch ( IOException e ) { } // to be safe: clear all.
try { sb . robots . clear ( ) ; } catch ( IOException e ) { } // to be safe: clear all.
@ -645,7 +649,7 @@ public class Crawler_p {
prop . put ( "crawlProfilesShow_linkstructure" , 0 ) ;
prop . put ( "crawlProfilesShow_linkstructure" , 0 ) ;
if ( count > 0 ) {
if ( count > 0 ) {
// collect the host names for 'wide' crawls which can be visualized
// collect the host names for 'wide' crawls which can be visualized
boolean showLinkstructure = hosts . length ( ) > 0 ;
boolean showLinkstructure = hosts . length ( ) > 0 & & ! hosts . contains ( "file:" ) ;
if ( showLinkstructure ) {
if ( showLinkstructure ) {
StringBuilder q = new StringBuilder ( ) ;
StringBuilder q = new StringBuilder ( ) ;
hosts = hosts . substring ( 1 ) ;
hosts = hosts . substring ( 1 ) ;