Lotus 13 years ago
parent 8d63a5887c
commit ee89cf5ae5

@ -482,16 +482,16 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
if (crawlingStartURL.isFile()) { if (crawlingStartURL.isFile()) {
return "file://" + crawlingStartURL.getPath() + ".*"; return "file://" + crawlingStartURL.getPath() + ".*";
} else if (crawlingStartURL.isSMB()) { } else if (crawlingStartURL.isSMB()) {
return "smb://" + crawlingStartURL.getHost() + ".*"; return "smb://" + crawlingStartURL.getHost() + "(?:/|$)+.*";
} else if (crawlingStartURL.isFTP()) { } else if (crawlingStartURL.isFTP()) {
return "ftp://" + crawlingStartURL.getHost() + ".*"; return "ftp://" + crawlingStartURL.getHost() + "(?:/|$)+.*";
} else { } else {
final String host = crawlingStartURL.getHost(); final String host = crawlingStartURL.getHost();
if (host.startsWith("www.")) { if (host.startsWith("www.")) {
return "https?://" + crawlingStartURL.getHost() + ".*"; return "https?://" + crawlingStartURL.getHost() + "(?:/|$)+.*";
} else { } else {
// if the www is not given we accept that also // if the www is not given we accept that also
return "https?://(www.)?" + crawlingStartURL.getHost() + ".*"; return "https?://(?:www.)?" + crawlingStartURL.getHost() + "(?:/|$)+.*";
} }
} }
} }

Loading…
Cancel
Save