From bf6ce33da380dd5778b2639a83498605573de7cb Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 31 Jul 2016 23:16:24 +0200 Subject: [PATCH 1/4] Correct use of _htDocsPath config in YaCyDefaultServlet to use servlet config variable + add some javadoc and remove a not useful static declaration --- .../net/yacy/http/servlets/YaCyDefaultServlet.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/source/net/yacy/http/servlets/YaCyDefaultServlet.java b/source/net/yacy/http/servlets/YaCyDefaultServlet.java index 1bc4b9dc7..99d431aad 100644 --- a/source/net/yacy/http/servlets/YaCyDefaultServlet.java +++ b/source/net/yacy/http/servlets/YaCyDefaultServlet.java @@ -290,7 +290,7 @@ public class YaCyDefaultServlet extends HttpServlet { if (!hasClass && (resource == null || !resource.exists()) && !pathInContext.contains("..")) { // try to get this in the alternative htDocsPath - resource = Resource.newResource(new File(HTTPDFileHandler.htDocsPath, pathInContext)); + resource = Resource.newResource(new File(_htDocsPath, pathInContext)); } if (ConcurrentLog.isFine("FILEHANDLER")) { @@ -1033,8 +1033,15 @@ public class YaCyDefaultServlet extends HttpServlet { } } } - - private static String appendPath(String proplist, String path) { + + /** + * Append a path string to comma separated string of pathes if not already + * contained in the proplist string + * @param proplist comma separated string of pathes + * @param path path to be appended + * @return comma separated string of pathes including param path + */ + private String appendPath(String proplist, String path) { if (proplist.length() == 0) return path; if (proplist.contains(path)) return proplist; return proplist + "," + path; From 1843ea7e69865d80aba7fb024b4dc54c2c2fc2a9 Mon Sep 17 00:00:00 2001 From: reger Date: Tue, 2 Aug 2016 02:41:03 +0200 Subject: [PATCH 2/4] on Blacklist.add pattern to source file also update internal entry maps as in Blacklist.add(blacklistType) to make entry effective w/o restart fix for http://mantis.tokeek.de/view.php?id=676 --- source/net/yacy/repository/Blacklist.java | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 0289ab86a..32bf2d9b3 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -297,9 +297,9 @@ public class Blacklist { } /** - * + * Adds entry to a given blacklist internal data and updates the source file * @param blacklistType - * @param blacklistToUse + * @param blacklistToUse source file * @param host * @param path * @throws PunycodeException @@ -362,7 +362,7 @@ public class Blacklist { } /** - * appends aN entry to the backlist source file. + * appends aN entry to the backlist source file and updates internal blacklist maps. * * @param blacklistSourcefile name of the blacklist file (LISTS/*.black) * @param host host or host pattern @@ -387,8 +387,21 @@ public class Blacklist { if (!p.isEmpty() && p.charAt(0) == '*') { p = "." + p; - } + } Pattern pattern = Pattern.compile(p, Pattern.CASE_INSENSITIVE); + + // update (put) pattern to internal blacklist maps (for which source is active) + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { + if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistSourcefile)) { + final Map> blacklistMap = getBlacklistMap(supportedBlacklistType, isMatchable(host)); + Set hostList; + if (!(blacklistMap.containsKey(h) && ((hostList = blacklistMap.get(h)) != null))) { + blacklistMap.put(h, (hostList = new HashSet())); + } + hostList.add(pattern); + } + } + // Append the line to the file. PrintWriter pw = null; try { From 5e9e8711927e0beb08881f8096f877d8b82c785d Mon Sep 17 00:00:00 2001 From: reger Date: Wed, 3 Aug 2016 02:13:26 +0200 Subject: [PATCH 3/4] fix Blacklist.remove by using pattern.toString to find pattern to remove, parameter String path did never equal Pattern. + delete unused removeAll, as it does not persist changes after restart --- source/net/yacy/repository/Blacklist.java | 34 ++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 32bf2d9b3..8688d5e1e 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.PrintWriter; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -256,17 +257,27 @@ public class Blacklist { loadList(blFile, sep); } - public final void removeAll(final BlacklistType blacklistType, final String host) { - getBlacklistMap(blacklistType, true).remove(host); - getBlacklistMap(blacklistType, false).remove(host); - } - + /** + * remove the host/path from internal blacklist maps for given blacklistType + * !! and removes the entry from source blacklist file !! + * @param blacklistType + * @param blacklistToUse + * @param host + * @param path + */ public final void remove(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) { final Map> blacklistMap = getBlacklistMap(blacklistType, true); Set hostList = blacklistMap.get(host); if (hostList != null) { - hostList.remove(path); + // remove pattern from list (by comparing patternstring with path, remove(path) will not match path) + for (Pattern hp : hostList) { + String hpxs = hp.pattern(); + if (hpxs.equals(path)) { + hostList.remove(hp); + break; + } + } if (hostList.isEmpty()) { blacklistMap.remove(host); } @@ -275,12 +286,21 @@ public class Blacklist { final Map> blacklistMapNotMatch = getBlacklistMap(blacklistType, false); hostList = blacklistMapNotMatch.get(host); if (hostList != null) { - hostList.remove(path); + // remove pattern from list + for (Pattern hp : hostList) { + String hpxs = hp.pattern(); + if (hpxs.equals(path)) { + hostList.remove(hp); + break; + } + } if (hostList.isEmpty()) { blacklistMapNotMatch.remove(host); } } + //TODO: check if delete from blacklist is desired, on reload entry will not be available in any blacklist + // even if remove (above) from internal maps (at runtime) is only done for given blacklistType // load blacklist data from file final List list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse)); From 5e335b32da26131e12728e8086ef3287aaeacb89 Mon Sep 17 00:00:00 2001 From: reger Date: Thu, 4 Aug 2016 01:12:49 +0200 Subject: [PATCH 4/4] fix Blacklist.contains() matching path pattern to string similar to https://github.com/yacy/yacy_search_server/commit/5e9e8711927e0beb08881f8096f877d8b82c785d + add proof testcase --- source/net/yacy/repository/Blacklist.java | 16 ++++++- .../net/yacy/repository/BlacklistTest.java | 43 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 test/java/net/yacy/repository/BlacklistTest.java diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 8688d5e1e..ce72e689e 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -466,6 +466,14 @@ public class Blacklist { return s != null && s.has(urlHash); } + /** + * Check blacklist to contain given host & path pattern. + * To check if a url matches a blacklist pattern, use isListed() + * @param blacklistType + * @param host + * @param path + * @return + */ public final boolean contains(final BlacklistType blacklistType, final String host, final String path) { boolean ret = false; @@ -477,7 +485,13 @@ public class Blacklist { final Set hostList = blacklistMap.get(h); if (hostList != null) { - ret = hostList.contains(path); + for (Pattern hp : hostList) { + String hpxs = hp.pattern(); + if (hpxs.equals(path)) { + ret = true; + break; + } + } } } return ret; diff --git a/test/java/net/yacy/repository/BlacklistTest.java b/test/java/net/yacy/repository/BlacklistTest.java new file mode 100644 index 000000000..3c48fbd58 --- /dev/null +++ b/test/java/net/yacy/repository/BlacklistTest.java @@ -0,0 +1,43 @@ +package net.yacy.repository; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; +import net.yacy.cora.document.id.Punycode; +import org.junit.Test; +import static org.junit.Assert.*; + +public class BlacklistTest { + + /** + * Simulates contains method, of class Blacklist as proof for pattern.toString + * needed and works + */ + @Test + public void testContains() throws Punycode.PunycodeException { + String path = ".*"; // simplest test pattern + + Pattern pattern = Pattern.compile(path, Pattern.CASE_INSENSITIVE); + + // pattern list as in Blacklist class + // ConcurrentMap>> hostpaths_matchable; + // simulate last part, path pattern set + Set hostList = new HashSet(); + hostList.add(pattern); + + // proof assumption pattern(path) != path + boolean ret = hostList.contains(path); + assertFalse("match blacklist pattern " + path, ret); + + // proof pattern.toString match works + for (Pattern hp : hostList) { + String hpxs = hp.pattern(); + if (hpxs.equals(path)) { + ret = true; + break; + } + } + assertTrue("match blacklist pattern " + path, ret); + } + +}