diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index 7e92a52c7..09745b5ca 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -482,16 +482,16 @@ public class CrawlProfile extends ConcurrentHashMap implements M if (crawlingStartURL.isFile()) { return "file://" + crawlingStartURL.getPath() + ".*"; } else if (crawlingStartURL.isSMB()) { - return "smb://" + crawlingStartURL.getHost() + "(?:/|$)+.*"; + return "smb://" + crawlingStartURL.getHost() + ".*"; } else if (crawlingStartURL.isFTP()) { - return "ftp://" + crawlingStartURL.getHost() + "(?:/|$)+.*"; + return "ftp://" + crawlingStartURL.getHost() + ".*"; } else { final String host = crawlingStartURL.getHost(); if (host.startsWith("www.")) { - return "https?://" + crawlingStartURL.getHost() + "(?:/|$)+.*"; + return "https?://" + crawlingStartURL.getHost() + ".*"; } else { // if the www is not given we accept that also - return "https?://(?:www.)?" + crawlingStartURL.getHost() + "(?:/|$)+.*"; + return "https?://(?:www.)?" + crawlingStartURL.getHost() + ".*"; } } } diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index ea1515e1a..3c3c53cbc 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -414,13 +414,13 @@ public final class CrawlStacker { // filter with must-match for URLs if ((depth > 0) && !profile.urlMustMatchPattern().matcher(urlstring).matches()) { if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' does not match must-match crawling filter '" + profile.urlMustMatchPattern().toString() + "'."); - return "url does not match must-match filter"; + return "url does not match must-match filter " + profile.urlMustMatchPattern().toString(); } // filter with must-not-match for URLs if ((depth > 0) && profile.urlMustNotMatchPattern().matcher(urlstring).matches()) { if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.urlMustNotMatchPattern().toString() + "'."); - return "url matches must-not-match filter"; + return "url matches must-not-match filter " + profile.urlMustNotMatchPattern().toString(); } // deny cgi