added option to simple crawl start: complete domain crawl

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4070 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent d2360eaf68
commit 34858be5ef

@ -50,10 +50,13 @@
</td>
</tr>
<tr valign="top" class="TableCellLight">
<td><label for="crawlingDepth">Crawling Depth</label>:</td>
<td><input name="crawlingDepth" id="crawlingDepth" type="text" size="2" maxlength="2" value="#[crawlingDepth]#" /></td>
<td><label for="crawlingDepth">Crawling Range</label>:</td>
<td>
The depth defines how deep the Crawler will follow links of links (...) and so on.
<input type="radio" name="range" value="wide" checked="checked" />Wide: depth <input name="crawlingDepth" id="crawlingDepth" type="text" size="2" maxlength="2" value="#[crawlingDepth]#" />&nbsp;&nbsp;|&nbsp;&nbsp;
<input type="radio" name="range" value="domain" />Complete Single Domain
</td>
<td>
The range defines if the crawl shall consider a complete domain, or a wide crawl up to a specific depth.
</td>
</tr>

@ -45,7 +45,7 @@ public class CrawlStartSimple_p {
// define visible variables
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0"));
prop.put("crawlingDepth", Math.min(3, env.getConfigLong("crawlingDepth", 0)));
prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0"));
int crawlingIfOlder = (int) env.getConfigLong("crawlingIfOlder", -1);

@ -97,10 +97,16 @@ public class WatchCrawler_p {
prop.put("info", 3);
} else {
// set new properties
boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start
String newcrawlingfilter = post.get("crawlingFilter", ".*");
if (fullDomain) try {
newcrawlingfilter = ".*" + (new URL(post.get("crawlingURL",""))).getHost() + ".*";
} catch (MalformedURLException e) {}
env.setConfig("crawlingFilter", newcrawlingfilter);
int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "0"));
if (fullDomain) newcrawlingdepth = 99;
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");

Loading…
Cancel
Save