diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html index d1b4efea0..474f79b58 100644 --- a/htroot/IndexCreate_p.html +++ b/htroot/IndexCreate_p.html @@ -45,9 +45,18 @@ You can define URLs as start points for Web page crawling and start crawling her Re-Crawl Option: - - + + + Year(s)  + Month(s)  + Day(s)  + Hour(s)  + Minute(s)  + + If you use this option, web pages that are already existent in your database are crawled and indexed again. + It depends on the age of the last crawl if this is done or not: if the last crawl is older than the given + date, the page is crawled again, othervise it is treaded as 'double' and not loaded or indexed again. diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index 31eed73cf..23d9ad514 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -95,8 +95,11 @@ public class IndexCreate_p { env.setConfig("crawlingFilter", newcrawlingfilter); int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "0")); env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth)); - int recrawlIfOlder = Integer.parseInt(post.get("crawlingIfOlder", "-1")); - env.setConfig("crawlingIfOlder", recrawlIfOlder); + boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "").equals("on"); + int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1")); + String crawlingIfOlderUnit = post.get("crawlingIfOlderUnit","year"); + int crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit); + env.setConfig("crawlingIfOlder", crawlingIfOlder); int domFilterDepth = Integer.parseInt(post.get("crawlingDomFilterDepth", "-1")); env.setConfig("crawlingDomFilterDepth", Integer.toString(domFilterDepth)); int domMaxPages = Integer.parseInt(post.get("crawlingDomMaxPages", "-1")); @@ -151,7 +154,7 @@ public class IndexCreate_p { switchboard.urlPool.errorURL.remove(urlhash); // stack url - plasmaCrawlProfile.entry pe = switchboard.profiles.newEntry(crawlingStartURL.getHost(), crawlingStart, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, recrawlIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw); + plasmaCrawlProfile.entry pe = switchboard.profiles.newEntry(crawlingStartURL.getHost(), crawlingStart, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw); String reasonString = switchboard.sbStackCrawlThread.stackCrawl(crawlingStart, null, yacyCore.seedDB.mySeed.hash, "CRAWLING-ROOT", new Date(), 0, pe); if (reasonString == null) { @@ -212,7 +215,7 @@ public class IndexCreate_p { HashMap hyperlinks = (HashMap) scraper.getAnchors(); // creating a crawler profile - plasmaCrawlProfile.entry profile = switchboard.profiles.newEntry(fileName, file.toURL().toString(), newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, recrawlIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw); + plasmaCrawlProfile.entry profile = switchboard.profiles.newEntry(fileName, file.toURL().toString(), newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw); // loop through the contained links Iterator interator = hyperlinks.entrySet().iterator(); @@ -301,7 +304,32 @@ public class IndexCreate_p { prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0")); prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0")); - prop.put("crawlingIfOlder", env.getConfig("crawlingIfOlder", "-1")); + + int crawlingIfOlder = (int) env.getConfigLong("crawlingIfOlder", -1); + prop.put("crawlingIfOlderCheck", (crawlingIfOlder == Integer.MAX_VALUE) ? 0 : 1); + prop.put("crawlingIfOlderUnitYearCheck", 0); + prop.put("crawlingIfOlderUnitMonthCheck", 0); + prop.put("crawlingIfOlderUnitDayCheck", 0); + prop.put("crawlingIfOlderUnitHourCheck", 0); + prop.put("crawlingIfOlderUnitMinuteCheck", 0); + if (crawlingIfOlder == Integer.MAX_VALUE) { + } else if (crawlingIfOlder >= 60*24*365) { + prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60*24*365); + prop.put("crawlingIfOlderUnitYearCheck", 1); + } else if (crawlingIfOlder >= 60*24*30) { + prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60*24*30); + prop.put("crawlingIfOlderUnitMonthCheck", 1); + } else if (crawlingIfOlder >= 60*24) { + prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60*24); + prop.put("crawlingIfOlderUnitDayCheck", 1); + } else if (crawlingIfOlder >= 60) { + prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60); + prop.put("crawlingIfOlderUnitHourCheck", 1); + } else { + prop.put("crawlingIfOlderNumber", crawlingIfOlder); + prop.put("crawlingIfOlderUnitMinuteCheck", 1); + } + //prop.put("crawlingIfOlder", crawlingIfOlder); prop.put("crawlingDomFilterDepth", env.getConfig("crawlingDomFilterDepth", "-1")); prop.put("crawlingDomMaxPages", env.getConfig("crawlingDomMaxPages", "-1")); prop.put("crawlingQChecked", env.getConfig("crawlingQ", "").equals("true") ? 1 : 0); @@ -476,7 +504,16 @@ public class IndexCreate_p { // return rewrite properties return prop; } - + + private static int recrawlIfOlderC(boolean recrawlIfOlderCheck, int recrawlIfOlderNumber, String crawlingIfOlderUnit) { + if (!recrawlIfOlderCheck) return -1; + if (crawlingIfOlderUnit.equals("year")) return recrawlIfOlderNumber * 60 * 24 * 356; + if (crawlingIfOlderUnit.equals("month")) return recrawlIfOlderNumber * 60 * 24 * 30; + if (crawlingIfOlderUnit.equals("day")) return recrawlIfOlderNumber * 60 * 24; + if (crawlingIfOlderUnit.equals("hour")) return recrawlIfOlderNumber * 60; + if (crawlingIfOlderUnit.equals("minute")) return recrawlIfOlderNumber; + return -1; + } } diff --git a/htroot/env/templates/header.template b/htroot/env/templates/header.template index f384a11be..2d0e025ce 100644 --- a/htroot/env/templates/header.template +++ b/htroot/env/templates/header.template @@ -37,13 +37,7 @@   Index Create   Index Control  Index Monitor - - -  Local Proxy   Blacklist -   Proxy Indexing -   Cache Monitor -   Cookie Monitor  Communication / Publication @@ -65,6 +59,12 @@   Connections +  Local Proxy +   Proxy Indexing +   Cache Monitor +   Cookie Monitor + +  The Project  Project Home  Project News diff --git a/source/yacy.java b/source/yacy.java index cbbfd8544..8528f1550 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -138,7 +138,7 @@ public final class yacy { private static float version = (float) 0.1; private static final String vDATE = "@REPL_DATE@"; - private static final String copyright = "[ YACY Proxy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]"; + private static final String copyright = "[ YaCy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]"; private static final String hline = "-------------------------------------------------------------------------------"; /** @@ -163,10 +163,10 @@ public final class yacy { } /** - * Combines the version of the proxy with the versionnumber from SVN to a + * Combines the version of YaCy with the versionnumber from SVN to a * combined version * - * @param version Current given version for this proxy. + * @param version Current given version. * @param svn Current version given from svn. * @return String with the combined version */