Decode blacklist entries for easier edition of non ascii chars

Not using the JDK URLDecoder.decode() function, as it strips '+'
characters when they occur after '?' (both characters having regular
expression semantics when used in blacklist path patterns)
pull/250/head
luccioman 6 years ago
parent ed93221fa1
commit 61c337f29a

@ -419,10 +419,21 @@ public class Blacklist_p {
} }
for (int j = offset; j < to; ++j){ for (int j = offset; j < to; ++j){
final String nextEntry = sortedlist[j]; String nextEntry = sortedlist[j];
if (nextEntry.isEmpty()) {
continue;
}
if (nextEntry.charAt(0) == '#') {
continue;
}
/** Decode the entry for easier reading of paths with non ascii characters */
final int slashPos = nextEntry.indexOf('/', 0);
if(slashPos > 0) {
nextEntry = nextEntry.substring(0, slashPos + 1) + MultiProtocolURL.unescapePath(nextEntry.substring(slashPos + 1));
}
if (nextEntry.isEmpty()) continue;
if (nextEntry.charAt(0) == '#') continue;
prop.put(DISABLED + EDIT + "Itemlist_" + entryCount + "_dark", dark ? "1" : "0"); prop.put(DISABLED + EDIT + "Itemlist_" + entryCount + "_dark", dark ? "1" : "0");
dark = !dark; dark = !dark;
/* We do not use here putHTML as we don't want '+' characters to be interpreted as application/x-www-form-urlencoded encoding */ /* We do not use here putHTML as we don't want '+' characters to be interpreted as application/x-www-form-urlencoded encoding */

@ -37,6 +37,7 @@ import java.io.UnsupportedEncodingException;
import java.net.InetAddress; import java.net.InetAddress;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.BitSet; import java.util.BitSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
@ -707,6 +708,60 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
return pathToEscape; return pathToEscape;
} }
/**
* Decode UTF-8 percent-encoded characters eventually found in the given path.
* <ul>
* Differences with {@link URLDecoder#decode(String, String)} :
* <li>the '+' character is not decoded to space character</li>
* <li>no exception is thrown when invalid hexadecimal digits are found after a '%' character</li>
* </ul>
*
* @param path an URL path eventually escaped
* @return return the unescaped path or null when path is null.
*/
public static final String unescapePath(final String escaped) {
if (escaped == null) {
return escaped;
}
boolean modified = false;
final int len = escaped.length();
final StringBuilder unescaped = new StringBuilder(len > 500 ? len / 2 : len);
ByteBuffer utf8Bytes = null;
int i = 0;
while (i < len) {
final char ch = escaped.charAt(i);
if (ch == '%' && (i + 2) < len) {
final char digit1 = escaped.charAt(i + 1);
final char digit2 = escaped.charAt(i + 2);
if (isHexDigit(digit1) && isHexDigit(digit2)) {
if (utf8Bytes == null) {
utf8Bytes = ByteBuffer.allocate((len - i) / 3);
}
/* Percent-encoded character UTF-8 byte */
int hexaValue = Integer.parseInt(escaped.substring(i + 1, i + 3), 16);
utf8Bytes.put((byte) hexaValue);
modified = true;
i += 2;
} else {
/* Not a valid percent-encoded character : we append it as is */
unescaped.append(ch);
}
} else {
if (utf8Bytes != null && utf8Bytes.position() > 0) {
unescaped.append(new String(utf8Bytes.array(), 0, utf8Bytes.position(), StandardCharsets.UTF_8));
utf8Bytes.position(0);
}
unescaped.append(ch);
}
i++;
}
if (utf8Bytes != null && utf8Bytes.position() > 0) {
unescaped.append(new String(utf8Bytes.array(), 0, utf8Bytes.position(), StandardCharsets.UTF_8));
}
return modified ? unescaped.toString() : escaped;
}
/** /**
* @param character a character to test * @param character a character to test
* @return true when the character is a valid hexadecimal digit * @return true when the character is a valid hexadecimal digit

@ -275,51 +275,77 @@ public class Blacklist {
public final void remove(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) { public final void remove(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) {
final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true); final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
Set<Pattern> hostList = blacklistMap.get(host); removePatternFromMap(host, path, blacklistMap);
if (hostList != null) {
// remove pattern from list (by comparing patternstring with path, remove(path) will not match path) final Map<String, Set<Pattern>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false);
for (Pattern hp : hostList) { removePatternFromMap(host, path, blacklistMapNotMatch);
String hpxs = hp.pattern();
if (hpxs.equals(path)) { //TODO: check if delete from blacklist is desired, on reload entry will not be available in any blacklist
hostList.remove(hp); // even if remove (above) from internal maps (at runtime) is only done for given blacklistType
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
/* delete the old entry from file, in any normalized or not normalized possible combinations */
final Set<String> entriesToDelete = new HashSet<>();
final String normalizedPathPattern = MultiProtocolURL.escapePathPattern(path);
entriesToDelete.add(host + "/" + path);
entriesToDelete.add(host + "/" + normalizedPathPattern);
if (!Punycode.isBasic(host)) {
try {
final String normalizedHost = MultiProtocolURL.toPunycode(host);
entriesToDelete.add(normalizedHost + "/" + path);
entriesToDelete.add(normalizedHost + "/" + normalizedPathPattern);
} catch (final PunycodeException ignored) {
/* We continue even if a punycode flavor can not be produced */
}
}
if (list != null) {
for (final String e : list) {
if (entriesToDelete.contains(e)) {
list.remove(e);
break; break;
} }
} }
if (hostList.isEmpty()) { FileUtils.writeList(new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
blacklistMap.remove(host);
} }
} }
final Map<String, Set<Pattern>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false); /**
hostList = blacklistMapNotMatch.get(host); * Remove the (host, pathPattern) entries eventually found in the given
* blacklist map.
*
* @param host the host part of the entry to remove
* @param pathPattern the path pattern part of the entry to remove
* @param blacklistMap a blacklist map to update
*/
private void removePatternFromMap(final String host, final String pathPattern,
final Map<String, Set<Pattern>> blacklistMap) {
final String normalizedPathPattern = MultiProtocolURL.escapePathPattern(pathPattern);
final Set<String> hosts = new HashSet<>();
hosts.add(host);
if (!Punycode.isBasic(host)) {
try {
hosts.add(MultiProtocolURL.toPunycode(host));
} catch (final PunycodeException ignored) {
/* We continue even if a punycode flavor can not be produced */
}
}
for (final String hostKey : hosts) {
final Set<Pattern> hostList = blacklistMap.get(hostKey);
if (hostList != null) { if (hostList != null) {
// remove pattern from list // remove pattern from list (by comparing patternstring with path, remove(path)
// will not match path)
for (Pattern hp : hostList) { for (Pattern hp : hostList) {
String hpxs = hp.pattern(); String hpxs = hp.pattern();
if (hpxs.equals(path)) { if (hpxs.equals(pathPattern) || hpxs.equals(normalizedPathPattern)) {
hostList.remove(hp); hostList.remove(hp);
break; break;
} }
} }
if (hostList.isEmpty()) { if (hostList.isEmpty()) {
blacklistMapNotMatch.remove(host); blacklistMap.remove(host);
}
}
//TODO: check if delete from blacklist is desired, on reload entry will not be available in any blacklist
// even if remove (above) from internal maps (at runtime) is only done for given blacklistType
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
// delete the old entry from file
if (list != null) {
for (final String e : list) {
if (e.equals(host + "/" + path)) {
list.remove(e);
break;
} }
} }
FileUtils.writeList(new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
} }
} }

@ -391,6 +391,36 @@ public class MultiProtocolURLTest {
} }
} }
/**
* Unit tests for {@link MultiProtocolURL#unescapePath(String)}
*/
@Test
public void testUnescapePath() {
String[][] testStrings = new String[][] {
// "test string", "expected unescaped result"
new String[] { "", "" }, new String[] { "/", "/" }, new String[] { "/ascii/path", "/ascii/path" },
new String[] { "/latin/chars/%C3%A0%C3%A4%C3%A2%C3%A9%C3%A8%C3%AF%C3%AE%C3%B4%C3%B6%C3%B9",
"/latin/chars/àäâéèïîôöù" },
new String[] { "/wiki/%25", "/wiki/%" },
new String[] { "/logograms/%E6%AD%A3%E9%AB%94%E5%AD%97/%E7%B9%81%E9%AB%94%E5%AD%97",
"/logograms/正體字/繁體字" },
new String[] { "/bad/hexaDigits/%GH%-1%èà/file", "/bad/hexaDigits/%GH%-1%èà/file" },
new String[] { "/missing/hexaDigit/%2", "/missing/hexaDigit/%2" },
new String[] { "/missing/hexaDigits/%", "/missing/hexaDigits/%" },
new String[] { "/unescaped/logograms/正體字/繁體字", "/unescaped/logograms/正體字/繁體字" },
new String[] { "/unescaped/rfc3986/unreserved/path/chars/-._~",
"/unescaped/rfc3986/unreserved/path/chars/-._~" },
new String[] { "/unescaped/rfc3986/subdelims/!$&'()*+,;=", "/unescaped/rfc3986/subdelims/!$&'()*+,;=" },
new String[] { "/unescaped/rfc3986/pchar/additional/:@", "/unescaped/rfc3986/pchar/additional/:@" },
new String[] { "/unescaped/regex/metacharacters/<([{\\^-=$!|]})?*+.>",
"/unescaped/regex/metacharacters/<([{\\^-=$!|]})?*+.>" } };
for (int i = 0; i < testStrings.length; i++) {
String[] testString = testStrings[i];
final String decoded = MultiProtocolURL.unescapePath(testString[0]);
assertEquals(testString[1], decoded);
}
}
/** /**
* Unit tests for {@link MultiProtocolURL#escapePathPattern(String)} * Unit tests for {@link MultiProtocolURL#escapePathPattern(String)}
*/ */

Loading…
Cancel
Save