diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 60c9a8cbb..609ab5fb6 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -170,7 +170,8 @@ public class Crawler_p { if (t > 0) deleteageDate = new Date(t); } final boolean deleteold = (deleteage && deleteageDate != null) || (restrictedcrawl && post.getBoolean("deleteold")); - + + final String sitemapURLStr = post.get("sitemapURL",""); String crawlingStart0 = post.get("crawlingURL","").trim(); // the crawljob start url String[] rootURLs0 = crawlingStart0.indexOf('\n') > 0 || crawlingStart0.indexOf('\r') > 0 ? crawlingStart0.split("[\\r\\n]+") : crawlingStart0.split(Pattern.quote("|")); Set rootURLs = new HashSet(); @@ -199,7 +200,7 @@ public class Crawler_p { if (p >= 8) crawlName = crawlName.substring(0, p); } if (crawlName.endsWith(",")) crawlName = crawlName.substring(0, crawlName.length() - 1); - + if (crawlName.length() == 0 && sitemapURLStr.length() > 0) crawlName = "sitemap loader for " + sitemapURLStr; // set the crawl filter String ipMustMatch = post.get("ipMustmatch", CrawlProfile.MATCH_ALL_STRING); @@ -442,7 +443,6 @@ public class Crawler_p { if (successurls.size() > 0) sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL); } } else if ("sitemap".equals(crawlingMode)) { - final String sitemapURLStr = post.get("sitemapURL",""); try { final DigestURL sitemapURL = new DigestURL(sitemapURLStr); sb.crawler.putActive(handle, profile);