So far we used same escape procedure for all parts of the url (which includes x-www-form-urlencoded for all url components)
Added capability to use different encoding rules for the different url components (through specific bitset for each component).
(this is inspired by org.apache.http.client and java.net.uri implementation).
- Added test case for  http://mantis.tokeek.de/view.php?id=559
pull/1/head
reger 10 years ago
parent 62087fb8b2
commit 1d81bd0687

@ -35,6 +35,7 @@ import java.io.InputStream;
import java.io.Serializable; import java.io.Serializable;
import java.net.InetAddress; import java.net.InetAddress;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.BitSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.Locale; import java.util.Locale;
@ -74,6 +75,46 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
private static final Pattern patternMail = Pattern.compile("^[a-z]+:.*?"); private static final Pattern patternMail = Pattern.compile("^[a-z]+:.*?");
//private static final Pattern patternSpace = Pattern.compile("%20"); //private static final Pattern patternSpace = Pattern.compile("%20");
private final static BitSet UNRESERVED_RFC1738 = new BitSet(256); // register unreserved chars (never escaped in url)
private final static BitSet UNRESERVED_PATH = new BitSet(256); // register unreserved chars for path part (not escaped in path)
static {
// unreserved characters (chars not to escape in url)
for (int i = 'A'; i <= 'Z'; i++) { // hialpha RFC1738 Section 5
UNRESERVED_RFC1738.set(i);
}
for (int i = 'a'; i <= 'z'; i++) { // lowalpha RFC1738 Section 5
UNRESERVED_RFC1738.set(i);
}
for (int i = '0'; i <= '9'; i++) { // digit RFC1738 Section 5
UNRESERVED_RFC1738.set(i);
}
// special char set RFC1738 Section 2.2 $-_.+!*'(),
UNRESERVED_RFC1738.set('$'); // safe chars RFC1738 Section 5
UNRESERVED_RFC1738.set('-'); // & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set('_'); // & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set('.'); // & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set('+');
UNRESERVED_RFC1738.set('!'); // extra chars RFC1738 Section 5 & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set('*'); // & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set('\''); // & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set('('); // & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set(')'); // & mark RFC2396 Section 2.2
UNRESERVED_RFC1738.set(',');
UNRESERVED_RFC1738.set('~'); // mark RFC2396 Section 2.2
// unreseved in URL path
UNRESERVED_PATH.or(UNRESERVED_RFC1738);
UNRESERVED_PATH.set('/'); // hpath segment separator RFC 1738 Section 5
UNRESERVED_PATH.set(';'); // hsegment param separator (FTP)
UNRESERVED_PATH.set(':');
UNRESERVED_PATH.set('@');
UNRESERVED_PATH.set('&');
UNRESERVED_PATH.set('=');
}
// session id handling // session id handling
private static final Object PRESENT = new Object(); private static final Object PRESENT = new Object();
private static final ConcurrentHashMap<String, Object> sessionIDnames = new ConcurrentHashMap<String, Object>(); private static final ConcurrentHashMap<String, Object> sessionIDnames = new ConcurrentHashMap<String, Object>();
@ -440,14 +481,25 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if (this.anchor != null) this.anchor = escape(this.anchor).toString(); if (this.anchor != null) this.anchor = escape(this.anchor).toString();
} }
/**
* Url encode/escape the path part according to the allowed characters
* (RFC1738 & RFC2396)
*/
private void escapePath() { private void escapePath() {
final String[] pathp = CommonPattern.SLASH.split(this.path, -1);
final StringBuilder ptmp = new StringBuilder(this.path.length() + 10); final StringBuilder ptmp = new StringBuilder(this.path.length() + 10);
for (final String element : pathp) { final byte[] bpath = UTF8.getBytes(this.path);
ptmp.append('/'); boolean modified = false;
ptmp.append(escape(element)); for (byte b : bpath) {
if (UNRESERVED_PATH.get(b)) {
ptmp.append((char) b);
} else {
ptmp.append(hex[b]);
modified = true;
}
}
if (modified) {
this.path = ptmp.toString();
} }
this.path = ptmp.substring((ptmp.length() > 0) ? 1 : 0);
} }
private void escapeSearchpart() { private void escapeSearchpart() {

@ -151,7 +151,8 @@ public class MultiProtocolURLTest {
String[][] testStrings = new String[][]{ String[][] testStrings = new String[][]{
// teststring , expectedresult // teststring , expectedresult
new String[]{"http://www.heise.de/newsticker/thema/%23saukontrovers", "http://www.heise.de/newsticker/thema/%23saukontrovers"}, // http://mantis.tokeek.de/view.php?id=519 new String[]{"http://www.heise.de/newsticker/thema/%23saukontrovers", "http://www.heise.de/newsticker/thema/%23saukontrovers"}, // http://mantis.tokeek.de/view.php?id=519
new String[]{"http://www.heise.de/newsticker/thema/#saukontrovers", "http://www.heise.de/newsticker/thema/"} new String[]{"http://www.heise.de/newsticker/thema/#saukontrovers", "http://www.heise.de/newsticker/thema/"},
new String[]{"http://www.liferay.com/community/wiki/-/wiki/Main/Wiki+Portlet", "http://www.liferay.com/community/wiki/-/wiki/Main/Wiki+Portlet"} // http://mantis.tokeek.de/view.php?id=559
}; };
for (String[] testString : testStrings) { for (String[] testString : testStrings) {
@ -159,10 +160,10 @@ public class MultiProtocolURLTest {
System.out.print("orig uri: " + testString[0]); System.out.print("orig uri: " + testString[0]);
String shouldBe = testString[1]; String shouldBe = testString[1];
// conversion result // conversion result
String resolvedHost = new MultiProtocolURL(testString[0]).toNormalform(true); String resultUrl = new MultiProtocolURL(testString[0]).toNormalform(true);
// test if equal // test if equal
assertEquals(shouldBe, resolvedHost); assertEquals(shouldBe, resultUrl);
System.out.println(" -> " + resolvedHost); System.out.println(" -> " + resultUrl);
} }
} }
} }

Loading…
Cancel
Save