From 84763126e0c87cf67f817220c30dca16d6b75310 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 24 Nov 2014 20:28:52 +0100 Subject: [PATCH] added option to make the YaCy proxy act as the cache is never stale. If set to 'Always Fresh' the cache is always used if the entry in the cache exist. This is a good way to archive web content and access it without going online again in case the documents exist. To do so, open /Settings_p.html?page=ProxyAccess and check the "Always Fresh" checkbox. This is set do false which behave as set before. If you set this to true, then you have your web archive in DATA/HTCACHE. Copy this to carry around your private copy of the internet! --- defaults/yacy.init | 3 +++ htroot/SettingsAck_p.html | 4 ++++ htroot/SettingsAck_p.java | 7 ++++++- htroot/Settings_ProxyAccess.inc | 8 ++++++++ htroot/Settings_p.java | 1 + source/net/yacy/crawler/retrieval/Response.java | 3 +++ 6 files changed, 25 insertions(+), 1 deletion(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 37911223e..31018bcc0 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -244,6 +244,9 @@ proxyCache = DATA/HTCACHE # default: 4 Gigabyte proxyCacheSize = 4096 +# you can use the proxy with fresh/stale rules or in a always-fresh mode +proxyAlwaysFresh = false + # a path to the surrogate input directory surrogates.in = DATA/SURROGATES/in diff --git a/htroot/SettingsAck_p.html b/htroot/SettingsAck_p.html index 94741d49c..4530a40a4 100644 --- a/htroot/SettingsAck_p.html +++ b/htroot/SettingsAck_p.html @@ -103,6 +103,10 @@ Transparent Proxy Support is: #[isTransparentProxy]# + + Always Fresh is: + #[proxyAlwaysFresh]# + Send via header is: #[proxy.sendViaHeader]# diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java index 25c41fc6b..d8f5c27ff 100644 --- a/htroot/SettingsAck_p.java +++ b/htroot/SettingsAck_p.java @@ -154,7 +154,12 @@ public class SettingsAck_p { boolean isTransparentProxy = post.containsKey("isTransparentProxy"); env.setConfig("isTransparentProxy", isTransparentProxy); prop.put("info_isTransparentProxy", isTransparentProxy ? "on" : "off"); - + + // set proxyAlwaysFresh flag + boolean proxyAlwaysFresh = post.containsKey("proxyAlwaysFresh"); + env.setConfig("proxyAlwaysFresh", proxyAlwaysFresh); + prop.put("info_proxyAlwaysFresh", proxyAlwaysFresh ? "on" : "off"); + // setting via header property env.setConfig("proxy.sendViaHeader", post.containsKey("proxy.sendViaHeader")); prop.put("info_proxy.sendViaHeader", post.containsKey("proxy.sendViaHeader")? "on" : "off"); diff --git a/htroot/Settings_ProxyAccess.inc b/htroot/Settings_ProxyAccess.inc index 6bfbbd0eb..373e97fbf 100644 --- a/htroot/Settings_ProxyAccess.inc +++ b/htroot/Settings_ProxyAccess.inc @@ -10,6 +10,14 @@ iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 --dport 80 -j DNAT --to 192.168.0.1:#[port]# + + : + + + If unchecked, the proxy will act using Cache Fresh / Cache Stale rules. If checked, the cache is always fresh which means + that a page is never loaded again if it was already stored in the cache. However, if the page does not exist in the cache, it will be loaded in any case. + + : diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java index 1386e6ee8..af1a7c0ac 100644 --- a/htroot/Settings_p.java +++ b/htroot/Settings_p.java @@ -84,6 +84,7 @@ public final class Settings_p { // http networking settings prop.put("isTransparentProxy", env.getConfigBool("isTransparentProxy", false) ? "1" : "0"); + prop.put("proxyAlwaysFresh", env.getConfigBool("proxyAlwaysFresh", false) ? "1" : "0"); prop.put("proxy.sendViaHeader", env.getConfigBool("proxy.sendViaHeader", false) ? "1" : "0"); prop.put("proxy.sendXForwardedForHeader", env.getConfigBool("proxy.sendXForwardedForHeader", true) ? "1" : "0"); diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java index a0cca651b..0680a1fed 100644 --- a/source/net/yacy/crawler/retrieval/Response.java +++ b/source/net/yacy/crawler/retrieval/Response.java @@ -44,6 +44,7 @@ import net.yacy.crawler.data.ResultURLs.EventOrigin; import net.yacy.document.Document; import net.yacy.document.Parser; import net.yacy.document.TextParser; +import net.yacy.search.Switchboard; public class Response { @@ -419,6 +420,8 @@ public class Response { */ public boolean isFreshForProxy() { + if (Switchboard.getSwitchboard().getConfigBool("proxyAlwaysFresh", false)) return true; + // -CGI access in request // CGI access makes the page very individual, and therefore not usable // in caches