diff --git a/htroot/CrawlStartSimple_p.html b/htroot/CrawlStartSimple_p.html index 57b85eb1e..9708320d5 100644 --- a/htroot/CrawlStartSimple_p.html +++ b/htroot/CrawlStartSimple_p.html @@ -50,10 +50,13 @@ - : - + : - The depth defines how deep the Crawler will follow links of links (...) and so on. + Wide: depth   |   + Complete Single Domain + + + The range defines if the crawl shall consider a complete domain, or a wide crawl up to a specific depth. diff --git a/htroot/CrawlStartSimple_p.java b/htroot/CrawlStartSimple_p.java index 41fec93b5..7926e7d27 100644 --- a/htroot/CrawlStartSimple_p.java +++ b/htroot/CrawlStartSimple_p.java @@ -45,7 +45,7 @@ public class CrawlStartSimple_p { // define visible variables prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); - prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0")); + prop.put("crawlingDepth", Math.min(3, env.getConfigLong("crawlingDepth", 0))); prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0")); int crawlingIfOlder = (int) env.getConfigLong("crawlingIfOlder", -1); diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 45e2c2f38..66f5f334f 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -97,10 +97,16 @@ public class WatchCrawler_p { prop.put("info", 3); } else { // set new properties + boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start + String newcrawlingfilter = post.get("crawlingFilter", ".*"); + if (fullDomain) try { + newcrawlingfilter = ".*" + (new URL(post.get("crawlingURL",""))).getHost() + ".*"; + } catch (MalformedURLException e) {} env.setConfig("crawlingFilter", newcrawlingfilter); int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "0")); + if (fullDomain) newcrawlingdepth = 99; env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth)); boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");