From 809b4e1fd9b724334d9140a5ae26df881bcd27e6 Mon Sep 17 00:00:00 2001 From: Marc Nause Date: Tue, 25 Mar 2014 22:14:54 +0100 Subject: [PATCH] Team added support for URLs with unicode characters in host part to blacklist. Punycode is used to handle unicode characters. --- htroot/BlacklistCleaner_p.java | 267 ++++++++++++------ htroot/Blacklist_p.java | 20 +- htroot/CrawlResults.java | 18 +- htroot/IndexControlRWIs_p.java | 35 ++- .../cora/document/id/MultiProtocolURL.java | 34 ++- source/net/yacy/repository/Blacklist.java | 32 ++- .../net/yacy/search/snippet/MediaSnippet.java | 24 +- 7 files changed, 286 insertions(+), 144 deletions(-) diff --git a/htroot/BlacklistCleaner_p.java b/htroot/BlacklistCleaner_p.java index 2beb6efe3..dae351055 100644 --- a/htroot/BlacklistCleaner_p.java +++ b/htroot/BlacklistCleaner_p.java @@ -41,6 +41,7 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import net.yacy.cora.document.id.Punycode.PunycodeException; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; import net.yacy.data.ListManager; @@ -55,84 +56,122 @@ import net.yacy.server.serverSwitch; public class BlacklistCleaner_p { + /** Used for logging. */ + private static final String APP_NAME = "BLACKLIST-CLEANER"; + private static final String RESULTS = "results_"; private static final String DISABLED = "disabled_"; private static final String BLACKLISTS = "blacklists_"; private static final String ENTRIES = "entries_"; - public static final Class[] supportedBLEngines = { - Blacklist.class - }; + public static final Class[] supportedBLEngines = { Blacklist.class }; - public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) { + public static serverObjects respond( + @SuppressWarnings("unused") final RequestHeader header, + final serverObjects post, + @SuppressWarnings("unused") final serverSwitch env) { final serverObjects prop = new serverObjects(); String blacklistToUse = null; - prop.put(DISABLED+"checked", "1"); + prop.put(DISABLED + "checked", "1"); if (post != null) { - final boolean allowRegex = post.get("allowRegex", "off").equalsIgnoreCase("on") ? true: false; - prop.put(DISABLED+"checked", (allowRegex) ? "1" : "0"); + final boolean allowRegex = post.get("allowRegex", "off") + .equalsIgnoreCase("on") ? true : false; + prop.put(DISABLED + "checked", (allowRegex) ? "1" : "0"); if (post.containsKey("listNames")) { blacklistToUse = post.get("listNames"); - if (blacklistToUse.isEmpty() || !ListManager.listSetContains("listManager.listsPath", blacklistToUse)) { + if (blacklistToUse.isEmpty() + || !ListManager.listSetContains( + "listManager.listsPath", + blacklistToUse)) { prop.put("results", "2"); } } - putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER), blacklistToUse); + putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, + Blacklist.BLACKLIST_FILENAME_FILTER), + blacklistToUse); if (blacklistToUse != null) { prop.put("results", "1"); if (post.containsKey("delete")) { prop.put(RESULTS + "modified", "1"); - prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", true))); + prop.put(RESULTS + "modified_delCount", + removeEntries(blacklistToUse, + BlacklistType.values(), + getKeysByPrefix(post, + "select", + true))); } else if (post.containsKey("alter")) { prop.put(RESULTS + "modified", "2"); - prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false))); + prop.put(RESULTS + "modified_alterCount", + alterEntries(blacklistToUse, + BlacklistType.values(), + getKeysByPrefix(post, + "select", + false), + getValuesByPrefix(post, + "entry", + false))); } // list illegal entries - final Map illegalEntries = getIllegalEntries(blacklistToUse, Switchboard.urlBlacklist, allowRegex); + final Map illegalEntries = getIllegalEntries( + blacklistToUse, Switchboard.urlBlacklist, + allowRegex); prop.put(RESULTS + "blList", blacklistToUse); prop.put(RESULTS + "entries", illegalEntries.size()); prop.putHTML(RESULTS + "blEngine", Blacklist.getEngineInfo()); - prop.put(RESULTS + "disabled", (illegalEntries.isEmpty()) ? "1" : "0"); + prop.put(RESULTS + "disabled", (illegalEntries.isEmpty()) ? "1" + : "0"); if (!illegalEntries.isEmpty()) { - prop.put(RESULTS + DISABLED + "entries", illegalEntries.size()); + prop.put(RESULTS + DISABLED + "entries", + illegalEntries.size()); int i = 0; String key; - for (final Entry entry : illegalEntries.entrySet()) { + for (final Entry entry : illegalEntries + .entrySet()) { key = entry.getKey(); - prop.put(RESULTS + DISABLED + ENTRIES + i + "_error", entry.getValue().getLong()); - prop.putHTML(RESULTS + DISABLED + ENTRIES + i + "_entry", key); + prop.put(RESULTS + DISABLED + ENTRIES + i + "_error", + entry.getValue().getLong()); + prop.putHTML(RESULTS + DISABLED + ENTRIES + i + + "_entry", key); i++; } } } } else { prop.put("results", "0"); - putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER), blacklistToUse); + putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, + Blacklist.BLACKLIST_FILENAME_FILTER), + blacklistToUse); } return prop; } /** - * Adds a list of blacklist to the server objects properties which are used to - * display the blacklist in the HTML page belonging to this servlet. - * @param prop Server objects properties object. - * @param lists List of blacklists. - * @param selected Element in list of blacklists which will be preselected in HTML. + * Adds a list of blacklist to the server objects properties which are used + * to display the blacklist in the HTML page belonging to this servlet. + * + * @param prop + * Server objects properties object. + * @param lists + * List of blacklists. + * @param selected + * Element in list of blacklists which will be preselected in + * HTML. */ - private static void putBlacklists(final serverObjects prop, final List lists, final String selected) { + private static void putBlacklists(final serverObjects prop, + final List lists, final String selected) { boolean supported = false; - for (int i=0; i < supportedBLEngines.length && !supported; i++) { + for (int i = 0; i < supportedBLEngines.length && !supported; i++) { supported |= (Switchboard.urlBlacklist.getClass() == supportedBLEngines[i]); } @@ -143,7 +182,8 @@ public class BlacklistCleaner_p { int count = 0; for (final String list : lists) { prop.putHTML(DISABLED + BLACKLISTS + count + "_name", list); - prop.put(DISABLED + BLACKLISTS + count + "_selected", (list.equals(selected)) ? "1" : "0"); + prop.put(DISABLED + BLACKLISTS + count + "_selected", + (list.equals(selected)) ? "1" : "0"); count++; } } else { @@ -152,48 +192,68 @@ public class BlacklistCleaner_p { } else { prop.put("disabled", "1"); for (int i = 0; i < supportedBLEngines.length; i++) { - prop.putHTML(DISABLED + "engines_" + i + "_name", supportedBLEngines[i].getName()); + prop.putHTML(DISABLED + "engines_" + i + "_name", + supportedBLEngines[i].getName()); } prop.put(DISABLED + "engines", supportedBLEngines.length); } } /** - * Retrieves all keys with a certain prefix from the data which has been sent and returns them as an array. This - * method is only a wrapper for {@link getByPrefix(de.anomic.server.serverObjects, java.lang.String, boolean, boolean)} - * which has been created to make it easier to understand the code. - * @param post All POST values. - * @param prefix Prefix by which the input is filtered. - * @param filterDoubles Set true if only unique results shall be returned, else false. + * Retrieves all keys with a certain prefix from the data which has been + * sent and returns them as an array. This method is only a wrapper for + * {@link getByPrefix(de.anomic.server.serverObjects, java.lang.String, + * boolean, boolean)} which has been created to make it easier to understand + * the code. + * + * @param post + * All POST values. + * @param prefix + * Prefix by which the input is filtered. + * @param filterDoubles + * Set true if only unique results shall be returned, else false. * @return Keys which have been posted. */ - private static String[] getKeysByPrefix(final serverObjects post, final String prefix, final boolean filterDoubles) { + private static String[] getKeysByPrefix(final serverObjects post, + final String prefix, final boolean filterDoubles) { return getByPrefix(post, prefix, true, filterDoubles); } /** - * Retrieves all values with a certain prefix from the data which has been sent and returns them as an array. This - * method is only a wrapper for {@link getByPrefix(de.anomic.server.serverObjects, java.lang.String, boolean, boolean)}. - * @param post All POST values. - * @param prefix Prefix by which the input is filtered. - * @param filterDoubles Set true if only unique results shall be returned, else false. + * Retrieves all values with a certain prefix from the data which has been + * sent and returns them as an array. This method is only a wrapper for + * {@link getByPrefix(de.anomic.server.serverObjects, java.lang.String, + * boolean, boolean)}. + * + * @param post + * All POST values. + * @param prefix + * Prefix by which the input is filtered. + * @param filterDoubles + * Set true if only unique results shall be returned, else false. * @return Values which have been posted. */ - private static String[] getValuesByPrefix(final serverObjects post, final String prefix, final boolean filterDoubles) { + private static String[] getValuesByPrefix(final serverObjects post, + final String prefix, final boolean filterDoubles) { return getByPrefix(post, prefix, false, filterDoubles); } /** - * Method which does all the work for {@link getKeysByPrefix(de.anomic.server.serverObjects, java.lang.String prefix, boolean)} - * and {@link getValuesByPrefix(de.anomic.server.serverObjects, java.lang.String prefix, boolean)} which - * have been crested to make it easier to understand the code. + * Method which does all the work for {@link + * getKeysByPrefix(de.anomic.server.serverObjects, java.lang.String prefix, + * boolean)} and {@link getValuesByPrefix(de.anomic.server.serverObjects, + * java.lang.String prefix, boolean)} which have been crested to make it + * easier to understand the code. + * * @param post * @param prefix * @param useKeys * @param useHashSet * @return */ - private static String[] getByPrefix(final serverObjects post, final String prefix, final boolean useKeys, final boolean useHashSet) { + private static String[] getByPrefix(final serverObjects post, + final String prefix, final boolean useKeys, + final boolean useHashSet) { Collection r; if (useHashSet) { r = new HashSet(); @@ -220,19 +280,27 @@ public class BlacklistCleaner_p { /** * Finds illegal entries in black list. - * @param blacklistToUse The blacklist to be checked. - * @param blEngine The blacklist engine which is used to check - * @param allowRegex Set to true to allow regular expressions in host part of blacklist entry. - * @return A map which contains all entries whoch have been identified as being - * illegal by the blacklistEngine with the entry as key and an error code as - * value. + * + * @param blacklistToUse + * The blacklist to be checked. + * @param blEngine + * The blacklist engine which is used to check + * @param allowRegex + * Set to true to allow regular expressions in host part of + * blacklist entry. + * @return A map which contains all entries whoch have been identified as + * being illegal by the blacklistEngine with the entry as key and an + * error code as value. */ - private static Map getIllegalEntries(final String blacklistToUse, final Blacklist blEngine, final boolean allowRegex) { + private static Map getIllegalEntries( + final String blacklistToUse, final Blacklist blEngine, + final boolean allowRegex) { final Map illegalEntries = new HashMap(); final Set legalEntries = new HashSet(); - final List list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse)); - final Map properties= new HashMap(); + final List list = FileUtils.getListArray(new File( + ListManager.listsPath, blacklistToUse)); + final Map properties = new HashMap(); properties.put("allowRegex", String.valueOf(allowRegex)); BlacklistError err = BlacklistError.NO_ERROR; @@ -259,30 +327,42 @@ public class BlacklistCleaner_p { /** * Removes existing entries from a blacklist. - * @param blacklistToUse The blacklist which contains the - * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in. - * @param entries Array of entries to be deleted. + * + * @param blacklistToUse + * The blacklist which contains the + * @param supportedBlacklistTypes + * Types of blacklists which the entry is to changed in. + * @param entries + * Array of entries to be deleted. * @return Length of the list of entries to be removed. */ - private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) { + private static int removeEntries(final String blacklistToUse, + final BlacklistType[] supportedBlacklistTypes, + final String[] entries) { for (final String entry : entries) { String s = entry; // get rid of escape characters which make it impossible to // properly use contains() if (s.contains("\\\\")) { - s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\")); + s = s.replaceAll(Pattern.quote("\\\\"), + Matcher.quoteReplacement("\\")); } // remove the entry from the running blacklist engine for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) { - if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { - final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0)); - final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1); + if (ListManager.listSetContains(supportedBlacklistType + + ".BlackLists", blacklistToUse)) { + final String host = (s.indexOf('/', 0) == -1) ? s : s + .substring(0, s.indexOf('/', 0)); + final String path = (s.indexOf('/', 0) == -1) ? ".*" : s + .substring(s.indexOf('/', 0) + 1); try { - Switchboard.urlBlacklist.remove(supportedBlacklistType, blacklistToUse, host, path); + Switchboard.urlBlacklist.remove(supportedBlacklistType, + blacklistToUse, host, path); } catch (final RuntimeException e) { - ConcurrentLog.severe("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path); + ConcurrentLog.severe(APP_NAME, e.getMessage() + ": " + + host + "/" + path); } } } @@ -293,35 +373,46 @@ public class BlacklistCleaner_p { /** * Changes existing entry in a blacklist. - * @param blacklistToUse The blacklist which contains the entry. - * @param supportedBlacklistTypes Types of blacklists which the entry is to changed in. - * @param oldEntry Entry to be changed. - * @param newEntry Changed entry. + * + * @param blacklistToUse + * The blacklist which contains the entry. + * @param supportedBlacklistTypes + * Types of blacklists which the entry is to changed in. + * @param oldEntry + * Entry to be changed. + * @param newEntry + * Changed entry. * @return The length of the new entry. */ - private static int alterEntries( - final String blacklistToUse, - final BlacklistType[] supportedBlacklistTypes, - final String[] oldEntry, - final String[] newEntry) { + private static int alterEntries(final String blacklistToUse, + final BlacklistType[] supportedBlacklistTypes, + final String[] oldEntry, final String[] newEntry) { removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry); String host, path; - for (final String n : newEntry) { - final int pos = n.indexOf('/',0); - if (pos < 0) { - host = n; - path = ".*"; - } else { - host = n.substring(0, pos); - path = n.substring(pos + 1); - } - for (final BlacklistType s : supportedBlacklistTypes) { - if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) { - Switchboard.urlBlacklist.add(s, blacklistToUse, host, path); - } - } - SearchEventCache.cleanupEvents(true); - } + for (final String n : newEntry) { + final int pos = n.indexOf('/', 0); + if (pos < 0) { + host = n; + path = ".*"; + } else { + host = n.substring(0, pos); + path = n.substring(pos + 1); + } + for (final BlacklistType s : supportedBlacklistTypes) { + if (ListManager.listSetContains(s + ".BlackLists", + blacklistToUse)) { + try { + Switchboard.urlBlacklist.add(s, blacklistToUse, host, + path); + } catch (PunycodeException e) { + ConcurrentLog.warn(APP_NAME, + "Unable to add blacklist entry to blacklist " + + s, e); + } + } + } + SearchEventCache.cleanupEvents(true); + } return newEntry.length; } } diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java index 55d844ef9..500e5f997 100644 --- a/htroot/Blacklist_p.java +++ b/htroot/Blacklist_p.java @@ -34,6 +34,9 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import org.eclipse.jetty.util.log.Log; + +import net.yacy.cora.document.id.Punycode.PunycodeException; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; @@ -48,13 +51,16 @@ import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; public class Blacklist_p { + + /** Used for logging. */ + private static final String APP_NAME = "Blacklist"; + private final static String EDIT = "edit_"; private final static String DISABLED = "disabled_"; private final static String BLACKLIST = "blackLists_"; private final static String BLACKLIST_MOVE = "blackListsMove_"; private final static String BLACKLIST_SHARED = "BlackLists.Shared"; - - + public static serverObjects respond(final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) { // load all blacklist files located in the directory @@ -134,7 +140,7 @@ public class Blacklist_p { final File blackListFile = new File(ListManager.listsPath, blacklistToUse); if(!blackListFile.delete()) { - ConcurrentLog.warn("Blacklist", "file "+ blackListFile +" could not be deleted!"); + ConcurrentLog.warn(APP_NAME, "file "+ blackListFile +" could not be deleted!"); } for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { @@ -551,7 +557,7 @@ public class Blacklist_p { // ignore empty entries if(newEntry == null || newEntry.isEmpty()) { - ConcurrentLog.warn("Blacklist", "skipped adding an empty entry"); + ConcurrentLog.warn(APP_NAME, "skipped adding an empty entry"); return ""; } @@ -582,7 +588,11 @@ public class Blacklist_p { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) { - Switchboard.urlBlacklist.add(supportedBlacklistType, blacklistToUse, host, path); + try { + Switchboard.urlBlacklist.add(supportedBlacklistType, blacklistToUse, host, path); + } catch (PunycodeException e) { + ConcurrentLog.warn(APP_NAME, "Unable to add blacklist entry to blacklist " + supportedBlacklistType, e); + } } } diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 50a57e53a..e23a9f19f 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -36,6 +36,7 @@ import java.util.Set; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; +import net.yacy.cora.document.id.Punycode.PunycodeException; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.data.ResultURLs; @@ -54,6 +55,9 @@ import net.yacy.utils.nxTools; public class CrawlResults { + /** Used for logging. */ + private static final String APP_NAME = "PLASMA"; + public static serverObjects respond(final RequestHeader header, serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final Switchboard sb = (Switchboard) env; @@ -139,7 +143,12 @@ public class CrawlResults { // handle addtoblacklist if (post.containsKey("delandaddtoblacklist")) { - Switchboard.urlBlacklist.add(selectedblacklist, domain, ".*"); + try { + Switchboard.urlBlacklist.add(selectedblacklist, domain, ".*"); + } catch (PunycodeException e) { + ConcurrentLog.warn(APP_NAME, "Unable to add blacklist entry to blacklist " + selectedblacklist, e); + + } } } } @@ -204,7 +213,7 @@ public class CrawlResults { urle = sb.index.fulltext().getMetadata(urlhash); } if (urle == null) { - ConcurrentLog.warn("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey()); + ConcurrentLog.warn(APP_NAME, "CrawlResults: URL not in index with url hash " + entry.getKey()); urlstr = null; urltxt = null; continue; @@ -291,7 +300,7 @@ public class CrawlResults { dark = !dark; cnt++; } catch (final Exception e) { - ConcurrentLog.severe("PLASMA", "genTableProps", e); + ConcurrentLog.severe(APP_NAME, "genTableProps", e); } } prop.put("table_indexed", cnt); @@ -331,9 +340,6 @@ public class CrawlResults { prop.put("table_blacklists", blacklistCount); } } - - - prop.put("process", tabletype.getCode()); // return rewrite properties diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 1774b0fad..0c8d08147 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -35,6 +35,7 @@ import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; +import net.yacy.cora.document.id.Punycode.PunycodeException; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; @@ -70,6 +71,8 @@ import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; public class IndexControlRWIs_p { + + private static final String APP_NAME = "IndexControlRWIs_p"; private final static String errmsg = "not possible to compute word from hash"; @@ -381,11 +384,17 @@ public class IndexControlRWIs_p { if ( ListManager.listSetContains( supportedBlacklistType + ".BlackLists", blacklist) ) { - Switchboard.urlBlacklist.add( - BlacklistType.valueOf(supportedBlacklistType), - blacklist, - url.getHost(), - url.getFile()); + try { + Switchboard.urlBlacklist.add( + BlacklistType.valueOf(supportedBlacklistType), + blacklist, + url.getHost(), + url.getFile()); + } catch (PunycodeException e) { + ConcurrentLog.warn(APP_NAME, + "Unable to add blacklist entry to blacklist " + + supportedBlacklistType, e); + } } } SearchEventCache.cleanupEvents(true); @@ -408,11 +417,17 @@ public class IndexControlRWIs_p { if ( ListManager.listSetContains( supportedBlacklistType + ".BlackLists", blacklist) ) { - Switchboard.urlBlacklist.add( - supportedBlacklistType, - blacklist, - url.getHost(), - ".*"); + try { + Switchboard.urlBlacklist.add( + supportedBlacklistType, + blacklist, + url.getHost(), + ".*"); + } catch (PunycodeException e) { + ConcurrentLog.warn(APP_NAME, + "Unable to add blacklist entry to blacklist " + + supportedBlacklistType, e); + } } } } diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 2e3c5078c..70f859c87 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -240,24 +240,28 @@ public class MultiProtocolURL implements Serializable, Comparable> blacklistMap = getBlacklistMap(blacklistType, isMatchable(host)); // avoid PatternSyntaxException e - final String h = ((!isMatchable(host) && !host.isEmpty() && host.charAt(0) == '*') ? "." + host : host).toLowerCase(); + final String h = ((!isMatchable(safeHost) && !safeHost.isEmpty() && safeHost.charAt(0) == '*') ? "." + safeHost : safeHost).toLowerCase(); if (!p.isEmpty() && p.charAt(0) == '*') { p = "." + p; } @@ -356,13 +370,14 @@ public class Blacklist { } /** - * appends a entry to the backlist source file + * appends aN entry to the backlist source file. * * @param blacklistSourcefile name of the blacklist file (LISTS/*.black) * @param host host or host pattern * @param path path or path pattern + * @throws PunycodeException */ - public final void add (final String blacklistSourcefile, final String host, final String path) { + public final void add (final String blacklistSourcefile, final String host, final String path) throws PunycodeException { // TODO: check sourcefile synced with cache.ser files ? if (host == null) { throw new IllegalArgumentException("host may not be null"); @@ -374,7 +389,10 @@ public class Blacklist { String p = (!path.isEmpty() && path.charAt(0) == '/') ? path.substring(1) : path; // avoid PatternSyntaxException e - final String h = ((!isMatchable(host) && !host.isEmpty() && host.charAt(0) == '*') ? "." + host : host).toLowerCase(); + String h = ((!isMatchable(host) && !host.isEmpty() && host.charAt(0) == '*') ? "." + host : host).toLowerCase(); + + h = Punycode.isBasic(h) ? h : MultiProtocolURL.toPunycode(h); + if (!p.isEmpty() && p.charAt(0) == '*') { p = "." + p; } diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java index f33ceb099..4cffbfdfa 100644 --- a/source/net/yacy/search/snippet/MediaSnippet.java +++ b/source/net/yacy/search/snippet/MediaSnippet.java @@ -27,7 +27,6 @@ package net.yacy.search.snippet; import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; -import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -58,7 +57,6 @@ import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; -@SuppressWarnings("unused") public class MediaSnippet implements Comparable, Comparator { public ContentDomain type; public DigestURL href, source; @@ -249,24 +247,24 @@ public class MediaSnippet implements Comparable, Comparator