- added dynamic filters to autoReCrawl.conf

- Restrict to sub-path: sub
- Restrict to start-domain: dom

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5070 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
apfelmaennchen 16 years ago
parent 8551e1d106
commit bd931a82f7

@ -880,3 +880,13 @@ routing.deleteOldSeeds.permission__pro = true
routing.deleteOldSeeds.time = 7
routing.deleteOldSeeds.time__pro = 30
# autoReCrawl Options
autoReCrawl_idlesleep = 3600000
autoReCrawl_busysleep = 3600000
autoReCrawl_memprereq = -1

@ -222,16 +222,27 @@ public class bookmarksDB {
serverLog.logInfo("BOOKMARKS", "autoReCrawl - checking schedule for: "+"["+serverDate.formatISO8601(date)+"] "+bm.getUrl());
if (interTime >= 0 && interTime < sleepTime) {
try {
// check if the crawl filter works correctly
Pattern.compile(newcrawlingfilter);
// set crawlingStart to BookmarkUrl
String crawlingStart = bm.getUrl();
// stack request
// first delete old entry, if exists
try {
int pos = 0;
// set crawlingStart to BookmarkUrl
String crawlingStart = bm.getUrl();
yacyURL crawlingStartURL = new yacyURL(crawlingStart, null);
// set the crawling filter
if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
if (crawlingStartURL!= null && newcrawlingfilter.equals("dom")) {
newcrawlingfilter = ".*" + crawlingStartURL.getHost() + ".*";
}
if (crawlingStart!= null && newcrawlingfilter.equals("sub") && (pos = crawlingStart.lastIndexOf("/")) > 0) {
newcrawlingfilter = crawlingStart.substring(0, pos + 1) + ".*";
}
sb.setConfig("crawlingFilter", newcrawlingfilter);
// check if the crawl filter works correctly
Pattern.compile(newcrawlingfilter);
String urlhash = crawlingStartURL.hash();
sb.webIndex.removeURL(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
@ -250,6 +261,7 @@ public class bookmarksDB {
if (reasonString == null) {
serverLog.logInfo("BOOKMARKS", "autoReCrawl - adding crawl profile for: " + crawlingStart);
serverLog.logInfo("BOOKMARKS", "autoReCrawl - crawl filter is set to: " + newcrawlingfilter);
// generate a YaCyNews if the global flag was set
if (crawlOrder) {
Map<String, String> m = new HashMap<String, String>(pe.map()); // must be cloned

Loading…
Cancel
Save