diff --git a/htroot/CrawlStartExpert.html b/htroot/CrawlStartExpert.html index 78af86373..8094076c3 100644 --- a/htroot/CrawlStartExpert.html +++ b/htroot/CrawlStartExpert.html @@ -381,6 +381,16 @@
Clean-Up before Crawl Start
+
+
+ +
+ Clean up search events cache info + + Check this option to be sure to get fresh search results including newly crawled documents. Beware that it will also interrupt any refreshing/resorting of search results currently requested from browser-side. + +
+
No Deletion
info After a crawl was done in the past, document may become stale and eventually they are also deleted on the target host. diff --git a/htroot/CrawlStartExpert.java b/htroot/CrawlStartExpert.java index d648c5a0b..062da6b35 100644 --- a/htroot/CrawlStartExpert.java +++ b/htroot/CrawlStartExpert.java @@ -35,6 +35,7 @@ import net.yacy.cora.util.Html2Image; import net.yacy.crawler.data.CrawlProfile; import net.yacy.document.LibraryProvider; import net.yacy.search.Switchboard; +import net.yacy.search.SwitchboardConstants; import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -364,6 +365,19 @@ public class CrawlStartExpert { } else { prop.put("deleteIfOlderUnitSelect_list_2_default", 1); } + + + // clean up search events cache ? + if (post != null && post.containsKey("cleanSearchCache")) { + prop.put("cleanSearchCacheChecked", post.getBoolean("cleanSearchCache")); + } else { + /* + * no parameter passed : the checkbox is proposed unchecked + * when JavaScript search resort is enabled, as it heavily relies on search events cache + */ + prop.put("cleanSearchCacheChecked", !sb.getConfigBool(SwitchboardConstants.SEARCH_JS_RESORT, + SwitchboardConstants.SEARCH_JS_RESORT_DEFAULT)); + } // delete any document before the crawl is started? if (post != null && post.containsKey("deleteold")) { diff --git a/htroot/CrawlStartSite.html b/htroot/CrawlStartSite.html index d8191be6f..c834a4381 100644 --- a/htroot/CrawlStartSite.html +++ b/htroot/CrawlStartSite.html @@ -86,6 +86,7 @@ + diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index e51707e3b..143add3dc 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -74,20 +74,18 @@ import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; +/** + * This servlet does NOT create the Crawler servlet page content! This controls + * a web crawl start or the crawl monitor page (Crawler_p.html). The interfaces for entering the web crawl parameters are + * in CrawlStartSite.html and CrawlStartExpert.html. + */ public class Crawler_p { - // this servlet does NOT create the Crawler servlet page content! - // this servlet starts a web crawl. The interface for entering the web crawl parameters is in IndexCreate_p.html - public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final Switchboard sb = (Switchboard) env; - // clean up all search events - SearchEventCache.cleanupEvents(true); - sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings - // inital values for AJAX Elements (without JavaScript) final serverObjects prop = new serverObjects(); prop.put("rejected", 0); @@ -220,6 +218,12 @@ public class Crawler_p { if (sb.peers == null) { prop.put("info", "3"); } else { + + if(post.getBoolean("cleanSearchCache")) { + // clean up all search events + SearchEventCache.cleanupEvents(true); + sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings + } // remove crawlingFileContent before we record the call String crawlingFileName = post.get("crawlingFile"); diff --git a/htroot/Load_MediawikiWiki.html b/htroot/Load_MediawikiWiki.html index 00b0a8a63..36442fa4d 100644 --- a/htroot/Load_MediawikiWiki.html +++ b/htroot/Load_MediawikiWiki.html @@ -34,6 +34,7 @@ + diff --git a/htroot/Load_MediawikiWiki.java b/htroot/Load_MediawikiWiki.java index 7474bff83..59b8a4ed2 100644 --- a/htroot/Load_MediawikiWiki.java +++ b/htroot/Load_MediawikiWiki.java @@ -26,6 +26,7 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; +import net.yacy.search.SwitchboardConstants; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -45,6 +46,18 @@ public class Load_MediawikiWiki { } prop.put("starturl", "http://"); prop.put("address", a); + + // hidden form param : clean up search events cache ? + if (post != null && post.containsKey("cleanSearchCache")) { + prop.put("cleanSearchCacheChecked", post.getBoolean("cleanSearchCache")); + } else { + /* + * no parameter passed : no search event cache clean-up + * when JavaScript search resort is enabled, as it heavily relies on search events cache + */ + prop.put("cleanSearchCacheChecked", !sb.getConfigBool(SwitchboardConstants.SEARCH_JS_RESORT, + SwitchboardConstants.SEARCH_JS_RESORT_DEFAULT)); + } // return rewrite properties return prop; diff --git a/htroot/Load_PHPBB3.html b/htroot/Load_PHPBB3.html index cf61e5878..c49583cba 100644 --- a/htroot/Load_PHPBB3.html +++ b/htroot/Load_PHPBB3.html @@ -45,6 +45,7 @@ + diff --git a/htroot/Load_PHPBB3.java b/htroot/Load_PHPBB3.java index 4d9b565bf..a3c8643c3 100644 --- a/htroot/Load_PHPBB3.java +++ b/htroot/Load_PHPBB3.java @@ -44,6 +44,18 @@ public class Load_PHPBB3 { final String repository = "http://" + a + "/"; prop.put("starturl", (intranet) ? repository : "http://"); prop.put("address", a); + + // hidden form param : clean up search events cache ? + if (post != null && post.containsKey("cleanSearchCache")) { + prop.put("cleanSearchCacheChecked", post.getBoolean("cleanSearchCache")); + } else { + /* + * no parameter passed : no search event cache clean-up + * when JavaScript search resort is enabled, as it heavily relies on search events cache + */ + prop.put("cleanSearchCacheChecked", !sb.getConfigBool(SwitchboardConstants.SEARCH_JS_RESORT, + SwitchboardConstants.SEARCH_JS_RESORT_DEFAULT)); + } // return rewrite properties return prop;