From e80dfeca23f838cf9ffcc226866c25f4f9b9d219 Mon Sep 17 00:00:00 2001 From: reger Date: Sat, 8 Dec 2012 06:34:48 +0100 Subject: [PATCH] - making blacklist path part case insensitive (solving http://bugs.yacy.net/view.php?id=171) - blacklist test adding explicite response text "not blocked" if no blacklist match --- htroot/BlacklistTest_p.html | 84 ++++----- htroot/BlacklistTest_p.java | 210 ++++++++++++---------- source/net/yacy/repository/Blacklist.java | 6 +- 3 files changed, 159 insertions(+), 141 deletions(-) diff --git a/htroot/BlacklistTest_p.html b/htroot/BlacklistTest_p.html index ad1025e89..61f15aba2 100644 --- a/htroot/BlacklistTest_p.html +++ b/htroot/BlacklistTest_p.html @@ -1,42 +1,42 @@ - - - - YaCy '#[clientname]#': Blacklist Test - #%env/templates/metas.template%# - - - #%env/templates/header.template%# - #%env/templates/submenuBlacklist.template%# - -

Blacklist Test

-

Used Blacklist engine: #[blacklistEngine]#

- -
-
- Test list: - - - - #(testlist)#:: -

-
- The tested URL was #[url]#
- It is blocked for the following cases:
-
    - #(listedincrawler)#::
  • Crawling
  • #(/listedincrawler)# - #(listedindht)#::
  • DHT
  • #(/listedindht)# - #(listedinnews)#::
  • News
  • #(/listedinnews)# - #(listedinproxy)#::
  • Proxy
  • #(/listedinproxy)# - #(listedinsearch)#::
  • Search
  • #(/listedinsearch)# - #(listedinsurftips)#::
  • Surftips
  • #(/listedinsurftips)# -
-
- :: -

-
The tested URL was not valid.
- #(/testlist)# -
-
- #%env/templates/footer.template%# - - + + + + YaCy '#[clientname]#': Blacklist Test + #%env/templates/metas.template%# + + + #%env/templates/header.template%# + #%env/templates/submenuBlacklist.template%# + +

Blacklist Test

+

Used Blacklist engine: #[blacklistEngine]#

+ +
+
+ Test list: + + + + #(testlist)#:: +

+
+ The tested URL was #[url]#
+ #(isnotblocked)#It is blocked for the following cases:
::
is not blocked
#(/isnotblocked)# +
    + #(listedincrawler)#::
  • Crawling
  • #(/listedincrawler)# + #(listedindht)#::
  • DHT
  • #(/listedindht)# + #(listedinnews)#::
  • News
  • #(/listedinnews)# + #(listedinproxy)#::
  • Proxy
  • #(/listedinproxy)# + #(listedinsearch)#::
  • Search
  • #(/listedinsearch)# + #(listedinsurftips)#::
  • Surftips
  • #(/listedinsurftips)# +
+
+ :: +

+
The tested URL was not valid.
+ #(/testlist)# +
+
+ #%env/templates/footer.template%# + + diff --git a/htroot/BlacklistTest_p.java b/htroot/BlacklistTest_p.java index 4faec57c1..432cb9cbf 100644 --- a/htroot/BlacklistTest_p.java +++ b/htroot/BlacklistTest_p.java @@ -1,96 +1,114 @@ -// BlacklistTest_p.java -// ----------------------- -// part of YaCy -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// -// This File is contributed by Alexander Schier -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -// You must compile this file with -// javac -classpath .:../classes Blacklist_p.java -// if the shell's current path is HTROOT - -import java.io.File; -import java.net.MalformedURLException; - -import net.yacy.cora.protocol.RequestHeader; -import net.yacy.data.ListManager; -import net.yacy.kelondro.data.meta.DigestURI; -import net.yacy.repository.Blacklist; -import net.yacy.repository.Blacklist.BlacklistType; -import net.yacy.search.Switchboard; -import net.yacy.server.serverObjects; -import net.yacy.server.serverSwitch; - -public class BlacklistTest_p { - - public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { - - // initialize the list manager - ListManager.switchboard = (Switchboard) env; - ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); - - final serverObjects prop = new serverObjects(); - prop.putHTML("blacklistEngine", Blacklist.getEngineInfo()); - - // do all post operations - if(post != null && post.containsKey("testList")) { - prop.put("testlist", "1"); - String urlstring = post.get("testurl", ""); - if (!urlstring.startsWith("http://") && - !urlstring.startsWith("https://") && - !urlstring.startsWith("ftp://") && - !urlstring.startsWith("smb://") && - !urlstring.startsWith("file://")) urlstring = "http://" + urlstring; - DigestURI testurl = null; - try { - testurl = new DigestURI(urlstring); - } catch (final MalformedURLException e) { - testurl = null; - } - if(testurl != null) { - prop.putHTML("url",testurl.toString()); - prop.putHTML("testlist_url",testurl.toString()); - if(Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) - prop.put("testlist_listedincrawler", "1"); - if(Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) - prop.put("testlist_listedindht", "1"); - if(Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) - prop.put("testlist_listedinnews", "1"); - if(Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) - prop.put("testlist_listedinproxy", "1"); - if(Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) - prop.put("testlist_listedinsearch", "1"); - if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) - prop.put("testlist_listedinsurftips", "1"); - } - else { - prop.putHTML("url",urlstring); - prop.put("testlist", "2"); - } - } else { - prop.putHTML("url", "http://"); - } - return prop; - } - -} +// BlacklistTest_p.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// This File is contributed by Alexander Schier +// +// $LastChangedDate$ +// $LastChangedRevision$ +// $LastChangedBy$ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +// You must compile this file with +// javac -classpath .:../classes Blacklist_p.java +// if the shell's current path is HTROOT + +import java.io.File; +import java.net.MalformedURLException; + +import net.yacy.cora.protocol.RequestHeader; +import net.yacy.data.ListManager; +import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; +import net.yacy.search.Switchboard; +import net.yacy.server.serverObjects; +import net.yacy.server.serverSwitch; + +public class BlacklistTest_p { + + public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { + + // initialize the list manager + ListManager.switchboard = (Switchboard) env; + ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); + + final serverObjects prop = new serverObjects(); + prop.putHTML("blacklistEngine", Blacklist.getEngineInfo()); + + // do all post operations + if(post != null && post.containsKey("testList")) { + prop.put("testlist", "1"); + String urlstring = post.get("testurl", ""); + if (!urlstring.startsWith("http://") && + !urlstring.startsWith("https://") && + !urlstring.startsWith("ftp://") && + !urlstring.startsWith("smb://") && + !urlstring.startsWith("file://")) urlstring = "http://" + urlstring; + DigestURI testurl = null; + try { + testurl = new DigestURI(urlstring); + } catch (final MalformedURLException e) { + testurl = null; + } + if(testurl != null) { + prop.putHTML("url",testurl.toString()); + prop.putHTML("testlist_url",testurl.toString()); + boolean isblocked = false; + + if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) { + prop.put("testlist_listedincrawler", "1"); + isblocked = true; + } + if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) { + prop.put("testlist_listedindht", "1"); + isblocked = true; + } + if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) { + prop.put("testlist_listedinnews", "1"); + isblocked = true; + } + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) { + prop.put("testlist_listedinproxy", "1"); + isblocked = true; + } + if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) { + prop.put("testlist_listedinsearch", "1"); + isblocked = true; + } + if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) { + prop.put("testlist_listedinsurftips", "1"); + isblocked = true; + } + + if (!isblocked) { + prop.put("testlist_isnotblocked", "1"); + } + } + else { + prop.putHTML("url",urlstring); + prop.put("testlist", "2"); + } + } else { + prop.putHTML("url", "http://"); + } + return prop; + } + +} diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 9ca6fae13..61ec2bd28 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -215,7 +215,7 @@ public class Blacklist { loadedPathsPattern = new ArrayList(); for (String a: loadedPaths) { if (a.equals("*")) { - loadedPathsPattern.add(Pattern.compile(".*")); + loadedPathsPattern.add(Pattern.compile("(?i).*")); continue; } if (a.indexOf("?*",0) > 0) { @@ -223,7 +223,7 @@ public class Blacklist { Log.logWarning("Blacklist", "ignored blacklist path to prevent 'Dangling meta character' exception: " + a); continue; } - loadedPathsPattern.add(Pattern.compile(a)); + loadedPathsPattern.add(Pattern.compile("(?i)" + a)); // add case insesitive regex } // create new entry if host mask unknown, otherwise merge @@ -295,7 +295,7 @@ public class Blacklist { blacklistMap.put(h, (hostList = new ArrayList())); } - hostList.add(Pattern.compile(p)); + hostList.add(Pattern.compile("(?i)" + p)); // add case insesitive regex } public int blacklistCacheSize() {