diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 6663fe06f..740862cf7 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -174,7 +174,13 @@ public class Crawler_p { } else if (crawlingStartURL.isFTP()) { newcrawlingMustMatch = "ftp://" + crawlingStartURL.getHost(); } else { - newcrawlingMustMatch = "https?://" + crawlingStartURL.getHost(); + final String host = crawlingStartURL.getHost(); + if (host.startsWith("www.")) { + newcrawlingMustMatch = "https?://" + crawlingStartURL.getHost(); + } else { + // if the www is not given we accept that also + newcrawlingMustMatch = "https?://(www.)?" + crawlingStartURL.getHost(); + } } if (subPath) newcrawlingMustMatch += crawlingStartURL.getPath(); newcrawlingMustMatch += ".*"; @@ -374,7 +380,7 @@ public class Crawler_p { String tagStr = tags.toString(); if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2); - // we will create always a bookmark to use this to track crawled hosts + // we will create always a bookmark to use this to track crawled hosts final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(crawlingStart, "admin"); if (bookmark != null) { bookmark.setProperty(BookmarksDB.Bookmark.BOOKMARK_TITLE, title); @@ -384,11 +390,11 @@ public class Crawler_p { bookmark.setTags(tags, true); sb.bookmarksDB.saveBookmark(bookmark); } - + // do the same for ymarks // TODO: could a non admin user add crawls? sb.tables.bookmarks.createBookmark(sb.loader, url, YMarkTables.USER_ADMIN, true, "crawlStart", "/Crawl Start"); - + // liftoff! prop.put("info", "8");//start msg prop.putHTML("info_crawlingURL", post.get("crawlingURL"));