diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 551d3805c..46bd0f54a 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -144,7 +144,7 @@ public class WatchCrawler_p { final boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on"); final int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1")); final String crawlingIfOlderUnit = post.get("crawlingIfOlderUnit","year"); - final int crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit); + final long crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit); env.setConfig("crawlingIfOlder", crawlingIfOlder); final boolean crawlingDomFilterCheck = post.get("crawlingDomFilterCheck", "off").equals("on"); @@ -394,13 +394,13 @@ public class WatchCrawler_p { return prop; } - private static int recrawlIfOlderC(final boolean recrawlIfOlderCheck, final int recrawlIfOlderNumber, final String crawlingIfOlderUnit) { - if (!recrawlIfOlderCheck) return -1; - if (crawlingIfOlderUnit.equals("year")) return recrawlIfOlderNumber * 60 * 24 * 365; - if (crawlingIfOlderUnit.equals("month")) return recrawlIfOlderNumber * 60 * 24 * 30; - if (crawlingIfOlderUnit.equals("day")) return recrawlIfOlderNumber * 60 * 24; - if (crawlingIfOlderUnit.equals("hour")) return recrawlIfOlderNumber * 60; - return recrawlIfOlderNumber; + private static long recrawlIfOlderC(final boolean recrawlIfOlderCheck, final int recrawlIfOlderNumber, final String crawlingIfOlderUnit) { + if (!recrawlIfOlderCheck) return 0L; + if (crawlingIfOlderUnit.equals("year")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L * 24L * 365L; + if (crawlingIfOlderUnit.equals("month")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L * 24L * 30L; + if (crawlingIfOlderUnit.equals("day")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L * 24L; + if (crawlingIfOlderUnit.equals("hour")) return System.currentTimeMillis() - (long) recrawlIfOlderNumber * 1000L * 60L * 60L; + return System.currentTimeMillis() - (long) recrawlIfOlderNumber; } private static void setPerformance(final plasmaSwitchboard sb, final serverObjects post) { diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index acfa31b34..69f82de97 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -146,7 +146,7 @@ public class CrawlProfile { public entry newEntry(final String name, final yacyURL startURL, final String generalFilter, final String specificFilter, final int generalDepth, final int specificDepth, - final int recrawlIfOlder /*minutes*/, final int domFilterDepth, final int domMaxPages, + final long recrawlIfOlder /*date*/, final int domFilterDepth, final int domMaxPages, final boolean crawlingQ, final boolean indexText, final boolean indexMedia, final boolean storeHTCache, final boolean storeTXCache, @@ -244,7 +244,7 @@ public class CrawlProfile { public entry(final String name, final yacyURL startURL, final String generalFilter, final String specificFilter, final int generalDepth, final int specificDepth, - final int recrawlIfOlder /*minutes*/, final int domFilterDepth, final int domMaxPages, + final long recrawlIfOlder /*date*/, final int domFilterDepth, final int domMaxPages, final boolean crawlingQ, final boolean indexText, final boolean indexMedia, final boolean storeHTCache, final boolean storeTXCache, @@ -260,7 +260,7 @@ public class CrawlProfile { mem.put(SPECIFIC_FILTER, (specificFilter == null) ? ".*" : specificFilter); mem.put(GENERAL_DEPTH, Integer.toString(generalDepth)); mem.put(SPECIFIC_DEPTH, Integer.toString(specificDepth)); - mem.put(RECRAWL_IF_OLDER, Integer.toString(recrawlIfOlder)); + mem.put(RECRAWL_IF_OLDER, Long.toString(recrawlIfOlder)); mem.put(DOM_FILTER_DEPTH, Integer.toString(domFilterDepth)); mem.put(DOM_MAX_PAGES, Integer.toString(domMaxPages)); mem.put(CRAWLING_Q, Boolean.toString(crawlingQ)); // crawling of urls with '?' @@ -339,14 +339,14 @@ public class CrawlProfile { } public long recrawlIfOlder() { // returns a long (millis) that is the minimum age that - // an antry must have to be re-crawled + // an entry must have to be re-crawled final String r = mem.get(RECRAWL_IF_OLDER); - if (r == null) return Long.MAX_VALUE; + if (r == null) return 0L; try { - final long l = Long.parseLong(r) * 60000L; - return (l < 0) ? Long.MAX_VALUE : l; + final long l = Long.parseLong(r); + return (l < 0) ? 0L : l; } catch (final NumberFormatException e) { - return Long.MAX_VALUE; + return 0L; } } public int domFilterDepth() { diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 74ce2184e..b8903aa27 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -452,7 +452,7 @@ public final class CrawlStacker extends Thread { // check if the url is double registered final String dbocc = sb.crawlQueues.urlExists(entry.url().hash()); final indexURLReference oldEntry = this.sb.webIndex.getURL(entry.url().hash(), null, 0); - final boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder()); + final boolean recrawl = (oldEntry != null) && (profile.recrawlIfOlder() > oldEntry.loaddate().getTime()); // do double-check if ((dbocc != null) && (!recrawl)) { reason = ErrorURL.DOUBLE_REGISTERED + dbocc + ")";