diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html
index d1b4efea0..474f79b58 100644
--- a/htroot/IndexCreate_p.html
+++ b/htroot/IndexCreate_p.html
@@ -45,9 +45,18 @@ You can define URLs as start points for Web page crawling and start crawling her
Re-Crawl Option:
-
-
+
+
+ Year(s)
+ Month(s)
+ Day(s)
+ Hour(s)
+ Minute(s)
+
+ If you use this option, web pages that are already existent in your database are crawled and indexed again.
+ It depends on the age of the last crawl if this is done or not: if the last crawl is older than the given
+ date, the page is crawled again, othervise it is treaded as 'double' and not loaded or indexed again.
diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java
index 31eed73cf..23d9ad514 100644
--- a/htroot/IndexCreate_p.java
+++ b/htroot/IndexCreate_p.java
@@ -95,8 +95,11 @@ public class IndexCreate_p {
env.setConfig("crawlingFilter", newcrawlingfilter);
int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "0"));
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
- int recrawlIfOlder = Integer.parseInt(post.get("crawlingIfOlder", "-1"));
- env.setConfig("crawlingIfOlder", recrawlIfOlder);
+ boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "").equals("on");
+ int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));
+ String crawlingIfOlderUnit = post.get("crawlingIfOlderUnit","year");
+ int crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit);
+ env.setConfig("crawlingIfOlder", crawlingIfOlder);
int domFilterDepth = Integer.parseInt(post.get("crawlingDomFilterDepth", "-1"));
env.setConfig("crawlingDomFilterDepth", Integer.toString(domFilterDepth));
int domMaxPages = Integer.parseInt(post.get("crawlingDomMaxPages", "-1"));
@@ -151,7 +154,7 @@ public class IndexCreate_p {
switchboard.urlPool.errorURL.remove(urlhash);
// stack url
- plasmaCrawlProfile.entry pe = switchboard.profiles.newEntry(crawlingStartURL.getHost(), crawlingStart, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, recrawlIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw);
+ plasmaCrawlProfile.entry pe = switchboard.profiles.newEntry(crawlingStartURL.getHost(), crawlingStart, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw);
String reasonString = switchboard.sbStackCrawlThread.stackCrawl(crawlingStart, null, yacyCore.seedDB.mySeed.hash, "CRAWLING-ROOT", new Date(), 0, pe);
if (reasonString == null) {
@@ -212,7 +215,7 @@ public class IndexCreate_p {
HashMap hyperlinks = (HashMap) scraper.getAnchors();
// creating a crawler profile
- plasmaCrawlProfile.entry profile = switchboard.profiles.newEntry(fileName, file.toURL().toString(), newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, recrawlIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw);
+ plasmaCrawlProfile.entry profile = switchboard.profiles.newEntry(fileName, file.toURL().toString(), newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingIfOlder, domFilterDepth, domMaxPages, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw);
// loop through the contained links
Iterator interator = hyperlinks.entrySet().iterator();
@@ -301,7 +304,32 @@ public class IndexCreate_p {
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0"));
prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0"));
- prop.put("crawlingIfOlder", env.getConfig("crawlingIfOlder", "-1"));
+
+ int crawlingIfOlder = (int) env.getConfigLong("crawlingIfOlder", -1);
+ prop.put("crawlingIfOlderCheck", (crawlingIfOlder == Integer.MAX_VALUE) ? 0 : 1);
+ prop.put("crawlingIfOlderUnitYearCheck", 0);
+ prop.put("crawlingIfOlderUnitMonthCheck", 0);
+ prop.put("crawlingIfOlderUnitDayCheck", 0);
+ prop.put("crawlingIfOlderUnitHourCheck", 0);
+ prop.put("crawlingIfOlderUnitMinuteCheck", 0);
+ if (crawlingIfOlder == Integer.MAX_VALUE) {
+ } else if (crawlingIfOlder >= 60*24*365) {
+ prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60*24*365);
+ prop.put("crawlingIfOlderUnitYearCheck", 1);
+ } else if (crawlingIfOlder >= 60*24*30) {
+ prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60*24*30);
+ prop.put("crawlingIfOlderUnitMonthCheck", 1);
+ } else if (crawlingIfOlder >= 60*24) {
+ prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60*24);
+ prop.put("crawlingIfOlderUnitDayCheck", 1);
+ } else if (crawlingIfOlder >= 60) {
+ prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60);
+ prop.put("crawlingIfOlderUnitHourCheck", 1);
+ } else {
+ prop.put("crawlingIfOlderNumber", crawlingIfOlder);
+ prop.put("crawlingIfOlderUnitMinuteCheck", 1);
+ }
+ //prop.put("crawlingIfOlder", crawlingIfOlder);
prop.put("crawlingDomFilterDepth", env.getConfig("crawlingDomFilterDepth", "-1"));
prop.put("crawlingDomMaxPages", env.getConfig("crawlingDomMaxPages", "-1"));
prop.put("crawlingQChecked", env.getConfig("crawlingQ", "").equals("true") ? 1 : 0);
@@ -476,7 +504,16 @@ public class IndexCreate_p {
// return rewrite properties
return prop;
}
-
+
+ private static int recrawlIfOlderC(boolean recrawlIfOlderCheck, int recrawlIfOlderNumber, String crawlingIfOlderUnit) {
+ if (!recrawlIfOlderCheck) return -1;
+ if (crawlingIfOlderUnit.equals("year")) return recrawlIfOlderNumber * 60 * 24 * 356;
+ if (crawlingIfOlderUnit.equals("month")) return recrawlIfOlderNumber * 60 * 24 * 30;
+ if (crawlingIfOlderUnit.equals("day")) return recrawlIfOlderNumber * 60 * 24;
+ if (crawlingIfOlderUnit.equals("hour")) return recrawlIfOlderNumber * 60;
+ if (crawlingIfOlderUnit.equals("minute")) return recrawlIfOlderNumber;
+ return -1;
+ }
}
diff --git a/htroot/env/templates/header.template b/htroot/env/templates/header.template
index f384a11be..2d0e025ce 100644
--- a/htroot/env/templates/header.template
+++ b/htroot/env/templates/header.template
@@ -37,13 +37,7 @@
-
-
-
-
-
-
@@ -65,6 +59,12 @@
+
+
+
+
+
+
diff --git a/source/yacy.java b/source/yacy.java
index cbbfd8544..8528f1550 100644
--- a/source/yacy.java
+++ b/source/yacy.java
@@ -138,7 +138,7 @@ public final class yacy {
private static float version = (float) 0.1;
private static final String vDATE = "@REPL_DATE@";
- private static final String copyright = "[ YACY Proxy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]";
+ private static final String copyright = "[ YaCy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]";
private static final String hline = "-------------------------------------------------------------------------------";
/**
@@ -163,10 +163,10 @@ public final class yacy {
}
/**
- * Combines the version of the proxy with the versionnumber from SVN to a
+ * Combines the version of YaCy with the versionnumber from SVN to a
* combined version
*
- * @param version Current given version for this proxy.
+ * @param version Current given version.
* @param svn Current version given from svn.
* @return String with the combined version
*/