normalization of url using urlencoding/decoding

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8017 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent e58438c01c
commit 37e35f2741

@ -29,16 +29,14 @@
//if the shell's current path is HTROOT //if the shell's current path is HTROOT
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URLDecoder;
import java.util.Date; import java.util.Date;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
@ -98,11 +96,7 @@ public class QuickCrawlLink_p {
// get the URL // get the URL
String crawlingStart = post.get("url",null); String crawlingStart = post.get("url",null);
try { crawlingStart = UTF8.decodeURL(crawlingStart);
crawlingStart = URLDecoder.decode(crawlingStart, "UTF-8");
} catch (final UnsupportedEncodingException e) {
Log.logException(e);
}
// get the browser title // get the browser title
final String title = post.get("title",null); final String title = post.get("title",null);

@ -29,9 +29,7 @@
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URLDecoder;
import java.util.Collection; import java.util.Collection;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.Iterator; import java.util.Iterator;
@ -358,14 +356,12 @@ public class ViewFile {
if (words.length() > 1 && words.charAt(0) == '[' && words.charAt(words.length() - 1) == ']') { if (words.length() > 1 && words.charAt(0) == '[' && words.charAt(words.length() - 1) == ']') {
words = words.substring(1, words.length() - 1); words = words.substring(1, words.length() - 1);
} }
try { words = UTF8.decodeURL(words);
words = URLDecoder.decode(words, "UTF-8"); if (words.indexOf(' ') >= 0) return words.split(" ");
if (words.indexOf(' ') >= 0) return words.split(" "); if (words.indexOf(',') >= 0) return words.split(",");
if (words.indexOf(',') >= 0) return words.split(","); if (words.indexOf('+') >= 0) return words.split("\\+");
if (words.indexOf('+') >= 0) return words.split("\\+"); w = new String[1];
w = new String[1]; w[0] = words;
w[0] = words;
} catch (final UnsupportedEncodingException e) {}
return w; return w;
} }

@ -1,24 +1,24 @@
/* /*
robotsParser.java robotsParser.java
------------------------------------- -------------------------------------
part of YACY part of YACY
(C) 2005, 2006 by Alexander Schier (C) 2005, 2006 by Alexander Schier
Martin Thelian Martin Thelian
last change: $LastChangedDate$LastChangedBy: orbiter $ last change: $LastChangedDate$LastChangedBy: orbiter $
Revision: $LastChangedRevision$ Revision: $LastChangedRevision$
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General public License as published by it under the terms of the GNU General public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General public License for more details. GNU General public License for more details.
You should have received a copy of the GNU General private License You should have received a copy of the GNU General private License
along with this program; if not, write to the Free Software along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@ -35,48 +35,49 @@ import java.io.BufferedReader;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.URLDecoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Set; import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
/* /*
* A class for Parsing robots.txt files. * A class for Parsing robots.txt files.
* It only parses the Deny Part, yet. * It only parses the Deny Part, yet.
* *
* Robots RFC * Robots RFC
* http://www.robotstxt.org/wc/norobots-rfc.html * http://www.robotstxt.org/wc/norobots-rfc.html
* *
* TODO: * TODO:
* - On the request attempt resulted in temporary failure a robot * - On the request attempt resulted in temporary failure a robot
* should defer visits to the site until such time as the resource * should defer visits to the site until such time as the resource
* can be retrieved. * can be retrieved.
* *
* - Extended Standard for Robot Exclusion * - Extended Standard for Robot Exclusion
* See: http://www.conman.org/people/spc/robots2.html * See: http://www.conman.org/people/spc/robots2.html
* *
* - Robot Exclusion Standard Revisited * - Robot Exclusion Standard Revisited
* See: http://www.kollar.com/robots.html * See: http://www.kollar.com/robots.html
*/ */
public final class RobotsTxtParser { public final class RobotsTxtParser {
private static final Pattern patternTab = Pattern.compile("\t"); private static final Pattern patternTab = Pattern.compile("\t");
private static final String ROBOTS_USER_AGENT = "User-agent:".toUpperCase(); private static final String ROBOTS_USER_AGENT = "User-agent:".toUpperCase();
private static final String ROBOTS_DISALLOW = "Disallow:".toUpperCase(); private static final String ROBOTS_DISALLOW = "Disallow:".toUpperCase();
private static final String ROBOTS_ALLOW = "Allow:".toUpperCase(); private static final String ROBOTS_ALLOW = "Allow:".toUpperCase();
private static final String ROBOTS_COMMENT = "#"; private static final String ROBOTS_COMMENT = "#";
private static final String ROBOTS_SITEMAP = "Sitemap:".toUpperCase(); private static final String ROBOTS_SITEMAP = "Sitemap:".toUpperCase();
private static final String ROBOTS_CRAWL_DELAY = "Crawl-delay:".toUpperCase(); private static final String ROBOTS_CRAWL_DELAY = "Crawl-delay:".toUpperCase();
private final ArrayList<String> allowList; private final ArrayList<String> allowList;
private final ArrayList<String> denyList; private final ArrayList<String> denyList;
private String sitemap; private String sitemap;
private long crawlDelayMillis; private long crawlDelayMillis;
private final Set<String> myNames; // a list of own name lists private final Set<String> myNames; // a list of own name lists
private String agentName; // the name of the agent that was used to return the result private String agentName; // the name of the agent that was used to return the result
protected RobotsTxtParser(final byte[] robotsTxt, final Set<String> myNames) { protected RobotsTxtParser(final byte[] robotsTxt, final Set<String> myNames) {
this.allowList = new ArrayList<String>(0); this.allowList = new ArrayList<String>(0);
this.denyList = new ArrayList<String>(0); this.denyList = new ArrayList<String>(0);
@ -90,26 +91,26 @@ public final class RobotsTxtParser {
parse(reader); parse(reader);
} }
} }
private void parse(final BufferedReader reader) { private void parse(final BufferedReader reader) {
final ArrayList<String> deny4AllAgents = new ArrayList<String>(); final ArrayList<String> deny4AllAgents = new ArrayList<String>();
final ArrayList<String> deny4ThisAgents = new ArrayList<String>(); final ArrayList<String> deny4ThisAgents = new ArrayList<String>();
final ArrayList<String> allow4AllAgents = new ArrayList<String>(); final ArrayList<String> allow4AllAgents = new ArrayList<String>();
final ArrayList<String> allow4ThisAgents = new ArrayList<String>(); final ArrayList<String> allow4ThisAgents = new ArrayList<String>();
int pos; int pos;
String line = null, lineUpper = null; String line = null, lineUpper = null;
boolean isRule4AllAgents = false, boolean isRule4AllAgents = false,
isRule4ThisAgents = false, isRule4ThisAgents = false,
rule4ThisAgentsFound = false, rule4ThisAgentsFound = false,
inBlock = false; inBlock = false;
try { try {
lineparser: while ((line = reader.readLine()) != null) { lineparser: while ((line = reader.readLine()) != null) {
// replacing all tabs with spaces // replacing all tabs with spaces
line = patternTab.matcher(line).replaceAll(" ").trim(); line = patternTab.matcher(line).replaceAll(" ").trim();
lineUpper = line.toUpperCase(); lineUpper = line.toUpperCase();
// parse empty line // parse empty line
if (line.length() == 0) { if (line.length() == 0) {
// we have reached the end of the rule block // we have reached the end of the rule block
@ -120,26 +121,26 @@ public final class RobotsTxtParser {
} }
continue lineparser; continue lineparser;
} }
// parse comment // parse comment
if (line.startsWith(ROBOTS_COMMENT)) { if (line.startsWith(ROBOTS_COMMENT)) {
// we can ignore this. Just a comment line // we can ignore this. Just a comment line
continue lineparser; continue lineparser;
} }
// parse sitemap; if there are several sitemaps then take the first url // parse sitemap; if there are several sitemaps then take the first url
// TODO: support for multiple sitemaps // TODO: support for multiple sitemaps
if (lineUpper.startsWith(ROBOTS_SITEMAP) && (sitemap == null || sitemap.length() == 0)) { if (lineUpper.startsWith(ROBOTS_SITEMAP) && (this.sitemap == null || this.sitemap.length() == 0)) {
pos = line.indexOf(' '); pos = line.indexOf(' ');
if (pos != -1) { if (pos != -1) {
sitemap = line.substring(pos).trim(); this.sitemap = line.substring(pos).trim();
} }
continue lineparser; continue lineparser;
} }
// parse user agent // parse user agent
if (lineUpper.startsWith(ROBOTS_USER_AGENT)) { if (lineUpper.startsWith(ROBOTS_USER_AGENT)) {
if (inBlock) { if (inBlock) {
// we have detected the start of a new block // we have detected the start of a new block
if (rule4ThisAgentsFound) { if (rule4ThisAgentsFound) {
@ -147,23 +148,23 @@ public final class RobotsTxtParser {
// or global settings which shall not overwrite YaCys settings. // or global settings which shall not overwrite YaCys settings.
break lineparser; break lineparser;
} }
inBlock = false; inBlock = false;
isRule4AllAgents = false; isRule4AllAgents = false;
isRule4ThisAgents = false; isRule4ThisAgents = false;
crawlDelayMillis = 0; // each block has a separate delay this.crawlDelayMillis = 0; // each block has a separate delay
} }
// cutting off comments at the line end // cutting off comments at the line end
pos = line.indexOf(ROBOTS_COMMENT); pos = line.indexOf(ROBOTS_COMMENT);
if (pos != -1) line = line.substring(0,pos).trim(); if (pos != -1) line = line.substring(0,pos).trim();
// getting out the robots name // getting out the robots name
pos = line.indexOf(' '); pos = line.indexOf(' ');
if (pos != -1) { if (pos != -1) {
final String userAgent = line.substring(pos).trim(); final String userAgent = line.substring(pos).trim();
isRule4AllAgents |= userAgent.equals("*"); isRule4AllAgents |= userAgent.equals("*");
for (String agent: this.myNames) { for (final String agent: this.myNames) {
if (userAgent.toLowerCase().equals(agent)) { if (userAgent.toLowerCase().equals(agent)) {
this.agentName = agent; this.agentName = agent;
isRule4ThisAgents = true; isRule4ThisAgents = true;
@ -174,7 +175,7 @@ public final class RobotsTxtParser {
} }
continue lineparser; continue lineparser;
} }
// parse crawl delay // parse crawl delay
if (lineUpper.startsWith(ROBOTS_CRAWL_DELAY)) { if (lineUpper.startsWith(ROBOTS_CRAWL_DELAY)) {
inBlock = true; inBlock = true;
@ -183,7 +184,7 @@ public final class RobotsTxtParser {
if (pos != -1) { if (pos != -1) {
try { try {
// the crawl delay can be a float number and means number of seconds // the crawl delay can be a float number and means number of seconds
crawlDelayMillis = (long) (1000.0 * Float.parseFloat(line.substring(pos).trim())); this.crawlDelayMillis = (long) (1000.0 * Float.parseFloat(line.substring(pos).trim()));
} catch (final NumberFormatException e) { } catch (final NumberFormatException e) {
// invalid crawling delay // invalid crawling delay
} }
@ -191,39 +192,39 @@ public final class RobotsTxtParser {
} }
continue lineparser; continue lineparser;
} }
// parse disallow // parse disallow
if (lineUpper.startsWith(ROBOTS_DISALLOW) || lineUpper.startsWith(ROBOTS_ALLOW)) { if (lineUpper.startsWith(ROBOTS_DISALLOW) || lineUpper.startsWith(ROBOTS_ALLOW)) {
inBlock = true; inBlock = true;
final boolean isDisallowRule = lineUpper.startsWith(ROBOTS_DISALLOW); final boolean isDisallowRule = lineUpper.startsWith(ROBOTS_DISALLOW);
if (isRule4ThisAgents || isRule4AllAgents) { if (isRule4ThisAgents || isRule4AllAgents) {
// cutting off comments at the line end // cutting off comments at the line end
pos = line.indexOf(ROBOTS_COMMENT); pos = line.indexOf(ROBOTS_COMMENT);
if (pos != -1) line = line.substring(0,pos).trim(); if (pos != -1) line = line.substring(0,pos).trim();
// cut off tailing * // cut off tailing *
if (line.endsWith("*")) line = line.substring(0,line.length()-1); if (line.endsWith("*")) line = line.substring(0,line.length()-1);
// parse the path // parse the path
pos = line.indexOf(' '); pos = line.indexOf(' ');
if (pos >= 0) { if (pos >= 0) {
// getting the path // getting the path
String path = line.substring(pos).trim(); String path = line.substring(pos).trim();
// unencoding all special charsx // unencoding all special charsx
try { try {
path = URLDecoder.decode(path, "UTF-8"); path = UTF8.decodeURL(path);
} catch (final Exception e) { } catch (final Exception e) {
/* /*
* url decoding failed. E.g. because of * url decoding failed. E.g. because of
* "Incomplete trailing escape (%) pattern" * "Incomplete trailing escape (%) pattern"
*/ */
} }
// escaping all occurences of ; because this char is used as special char in the Robots DB // escaping all occurences of ; because this char is used as special char in the Robots DB
path = RobotsTxt.ROBOTS_DB_PATH_SEPARATOR_MATCHER.matcher(path).replaceAll("%3B"); path = RobotsTxt.ROBOTS_DB_PATH_SEPARATOR_MATCHER.matcher(path).replaceAll("%3B");
// adding it to the pathlist // adding it to the pathlist
if (isDisallowRule) { if (isDisallowRule) {
if (isRule4AllAgents) deny4AllAgents.add(path); if (isRule4AllAgents) deny4AllAgents.add(path);
@ -238,11 +239,11 @@ public final class RobotsTxtParser {
} }
} }
} catch (final IOException e) {} } catch (final IOException e) {}
allowList.addAll(rule4ThisAgentsFound ? allow4ThisAgents : allow4AllAgents); this.allowList.addAll(rule4ThisAgentsFound ? allow4ThisAgents : allow4AllAgents);
denyList.addAll(rule4ThisAgentsFound ? deny4ThisAgents : deny4AllAgents); this.denyList.addAll(rule4ThisAgentsFound ? deny4ThisAgents : deny4AllAgents);
} }
/** /**
* a crawl delay can be assigned to every agent or for all agents * a crawl delay can be assigned to every agent or for all agents
* a special case is where the user agent of this yacy peer is given explicitely * a special case is where the user agent of this yacy peer is given explicitely
@ -253,7 +254,7 @@ public final class RobotsTxtParser {
protected long crawlDelayMillis() { protected long crawlDelayMillis() {
return this.crawlDelayMillis; return this.crawlDelayMillis;
} }
/** /**
* the user agent that was applied to get the crawl properties is recorded * the user agent that was applied to get the crawl properties is recorded
* because it is possible that this robots.txt parser applies to several user agents * because it is possible that this robots.txt parser applies to several user agents
@ -264,15 +265,15 @@ public final class RobotsTxtParser {
protected String agentName() { protected String agentName() {
return this.agentName; return this.agentName;
} }
protected String sitemap() { protected String sitemap() {
return this.sitemap; return this.sitemap;
} }
protected ArrayList<String> allowList() { protected ArrayList<String> allowList() {
return this.allowList; return this.allowList;
} }
protected ArrayList<String> denyList() { protected ArrayList<String> denyList() {
return this.denyList; return this.denyList;
} }

@ -69,13 +69,11 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.lang.ref.SoftReference; import java.lang.ref.SoftReference;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.net.URLDecoder;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
@ -266,13 +264,7 @@ public final class HTTPDFileHandler {
return; return;
} }
// url decoding of path path = UTF8.decodeURL(path);
try {
path = URLDecoder.decode(path, "UTF-8");
} catch (final UnsupportedEncodingException e) {
// This should never occur
assert(false) : "UnsupportedEncodingException: " + e.getMessage();
}
// check against hack attacks in path // check against hack attacks in path
if (path.indexOf("..") >= 0) { if (path.indexOf("..") >= 0) {
@ -538,8 +530,8 @@ public final class HTTPDFileHandler {
// implement proxy via url (not in servlet, because we need binary access on ouputStream) // implement proxy via url (not in servlet, because we need binary access on ouputStream)
if (path.equals("/proxy.html")) { if (path.equals("/proxy.html")) {
final List<Pattern> urlProxyAccess = Domains.makePatterns(sb.getConfig("proxyURL.access", "127.0.0.1")); final List<Pattern> urlProxyAccess = Domains.makePatterns(sb.getConfig("proxyURL.access", "127.0.0.1"));
UserDB.Entry user = sb.userDB.getUser(requestHeader); final UserDB.Entry user = sb.userDB.getUser(requestHeader);
boolean user_may_see_proxyurl = Domains.matchesList(clientIP, urlProxyAccess) || (user!=null && user.hasRight(UserDB.AccessRight.PROXY_RIGHT)); final boolean user_may_see_proxyurl = Domains.matchesList(clientIP, urlProxyAccess) || (user!=null && user.hasRight(UserDB.AccessRight.PROXY_RIGHT));
if (sb.getConfigBool("proxyURL", false) && user_may_see_proxyurl) { if (sb.getConfigBool("proxyURL", false) && user_may_see_proxyurl) {
doURLProxy(args, conProp, requestHeader, out); doURLProxy(args, conProp, requestHeader, out);
return; return;
@ -1308,7 +1300,7 @@ public final class HTTPDFileHandler {
* not in separete servlet, because we need access to binary outstream * not in separete servlet, because we need access to binary outstream
* @throws IOException * @throws IOException
*/ */
private static void doURLProxy(final serverObjects args, final HashMap<String, Object> conProp, final RequestHeader requestHeader, OutputStream out) throws IOException { private static void doURLProxy(final serverObjects args, final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream out) throws IOException {
final String httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); final String httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
URL proxyurl = null; URL proxyurl = null;
@ -1325,7 +1317,7 @@ public final class HTTPDFileHandler {
} }
String host = proxyurl.getHost(); String host = proxyurl.getHost();
if (proxyurl.getPort() != -1) { if (proxyurl.getPort() != -1) {
host += ":" + proxyurl.getPort(); host += ":" + proxyurl.getPort();
} }
// set properties for proxy connection // set properties for proxy connection
@ -1430,7 +1422,7 @@ public final class HTTPDFileHandler {
} else if (url.startsWith("//")) { } else if (url.startsWith("//")) {
// absoulte url but same protocol of form href="//domain.com/path" // absoulte url but same protocol of form href="//domain.com/path"
String complete_url = proxyurl.getProtocol() + ":" + url; final String complete_url = proxyurl.getProtocol() + ":" + url;
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) { if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURI(complete_url)) != null) { if (sb.crawlStacker.urlInAcceptedDomain(new DigestURI(complete_url)) != null) {
continue; continue;
@ -1455,7 +1447,7 @@ public final class HTTPDFileHandler {
newurl = newurl.replaceAll("\\$","\\\\\\$"); newurl = newurl.replaceAll("\\$","\\\\\\$");
m.appendReplacement(result, newurl); m.appendReplacement(result, newurl);
} }
catch (MalformedURLException e) {} catch (final MalformedURLException e) {}
} }
} }
@ -1466,7 +1458,7 @@ public final class HTTPDFileHandler {
if (outgoingHeader.containsKey(HeaderFramework.TRANSFER_ENCODING)) { if (outgoingHeader.containsKey(HeaderFramework.TRANSFER_ENCODING)) {
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, httpStatus, outgoingHeader); HTTPDemon.sendRespondHeader(conProp, out, httpVersion, httpStatus, outgoingHeader);
ChunkedOutputStream cos = new ChunkedOutputStream(out); final ChunkedOutputStream cos = new ChunkedOutputStream(out);
cos.write(sbb); cos.write(sbb);
cos.finish(); cos.finish();

@ -127,6 +127,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
// identify protocol // identify protocol
assert (url != null); assert (url != null);
url = url.trim(); url = url.trim();
url = UTF8.decodeURL(url); // normalization here
//url = patternSpace.matcher(url).replaceAll(" "); //url = patternSpace.matcher(url).replaceAll(" ");
if (url.startsWith("\\\\")) { if (url.startsWith("\\\\")) {
url = "smb://" + patternBackSlash.matcher(url.substring(2)).replaceAll("/"); url = "smb://" + patternBackSlash.matcher(url.substring(2)).replaceAll("/");

@ -154,4 +154,51 @@ public class UTF8 {
return s.getBytes(charset); return s.getBytes(charset);
} }
/**
* Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
* encoding scheme.
*/
public static String decodeURL(final String s) {
boolean needToChange = false;
final int numChars = s.length();
final StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
int i = 0;
char c;
byte[] bytes = null;
while (i < numChars) {
c = s.charAt(i);
switch (c) {
case '+':
sb.append(' ');
i++;
needToChange = true;
break;
case '%':
try {
if (bytes == null) bytes = new byte[(numChars-i)/3];
int pos = 0;
while (((i+2) < numChars) && (c=='%')) {
final int v = Integer.parseInt(s.substring(i+1,i+3),16);
if (v < 0) throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
bytes[pos++] = (byte) v;
i+= 3;
if (i < numChars) c = s.charAt(i);
}
if ((i < numChars) && (c=='%')) throw new IllegalArgumentException("URLDecoder: Incomplete trailing escape (%) pattern");
sb.append(new String(bytes, 0, pos, charset));
} catch (final NumberFormatException e) {
throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - " + e.getMessage());
}
needToChange = true;
break;
default:
sb.append(c);
i++;
break;
}
}
return (needToChange? sb.toString() : s);
}
} }

Loading…
Cancel
Save