@ -81,8 +81,22 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
private static final Pattern patternMail = Pattern . compile ( "^[a-z]+:.*?" ) ;
private static final Pattern patternMail = Pattern . compile ( "^[a-z]+:.*?" ) ;
//private static final Pattern patternSpace = Pattern.compile("%20");
//private static final Pattern patternSpace = Pattern.compile("%20");
private final static BitSet UNRESERVED_RFC1738 = new BitSet ( 128 ) ; // register unreserved chars (never escaped in url)
/** Register unreserved chars (never escaped in url) */
private final static BitSet UNRESERVED_PATH = new BitSet ( 128 ) ; // register unreserved chars for path part (not escaped in path)
private final static BitSet UNRESERVED_RFC1738 = new BitSet ( 128 ) ;
/** Register unreserved chars for path part (not escaped in path) */
private final static BitSet UNRESERVED_PATH = new BitSet ( 128 ) ;
/ * *
* Register regular expressions metacharacters used by the { @link Pattern }
* class .
*
* @see < a href =
* "https://docs.oracle.com/javase/tutorial/essential/regex/literals.html" > Regular
* expressions string literals documentation < / a >
* /
private static final BitSet PATTERN_METACHARACTERS = new BitSet ( 128 ) ;
static {
static {
// unreserved characters (chars not to escape in url)
// unreserved characters (chars not to escape in url)
for ( int i = 'A' ; i < = 'Z' ; i + + ) { // hialpha RFC1738 Section 5
for ( int i = 'A' ; i < = 'Z' ; i + + ) { // hialpha RFC1738 Section 5
@ -119,6 +133,27 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
UNRESERVED_PATH . set ( '@' ) ;
UNRESERVED_PATH . set ( '@' ) ;
UNRESERVED_PATH . set ( '&' ) ;
UNRESERVED_PATH . set ( '&' ) ;
UNRESERVED_PATH . set ( '=' ) ;
UNRESERVED_PATH . set ( '=' ) ;
/* Pattern metacharacters : <([{\^-=$!|]})?*+.> */
PATTERN_METACHARACTERS . set ( '<' ) ;
PATTERN_METACHARACTERS . set ( '(' ) ;
PATTERN_METACHARACTERS . set ( '[' ) ;
PATTERN_METACHARACTERS . set ( '{' ) ;
PATTERN_METACHARACTERS . set ( '\\' ) ;
PATTERN_METACHARACTERS . set ( '^' ) ;
PATTERN_METACHARACTERS . set ( '-' ) ;
PATTERN_METACHARACTERS . set ( '=' ) ;
PATTERN_METACHARACTERS . set ( '$' ) ;
PATTERN_METACHARACTERS . set ( '!' ) ;
PATTERN_METACHARACTERS . set ( '|' ) ;
PATTERN_METACHARACTERS . set ( ']' ) ;
PATTERN_METACHARACTERS . set ( '}' ) ;
PATTERN_METACHARACTERS . set ( ')' ) ;
PATTERN_METACHARACTERS . set ( '?' ) ;
PATTERN_METACHARACTERS . set ( '*' ) ;
PATTERN_METACHARACTERS . set ( '+' ) ;
PATTERN_METACHARACTERS . set ( '.' ) ;
PATTERN_METACHARACTERS . set ( '>' ) ;
}
}
// session id handling
// session id handling
@ -552,23 +587,101 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
* < / ul >
* < / ul >
* /
* /
private void escape ( ) {
private void escape ( ) {
if ( this . path ! = null & & this . path . indexOf ( '%' ) = = - 1 ) escapePath ( ) ;
if ( this . path ! = null & & this . path . indexOf ( '%' ) = = - 1 ) {
this . path = escapePath ( this . path ) ;
}
if ( this . searchpart ! = null & & this . searchpart . indexOf ( '%' ) = = - 1 ) escapeSearchpart ( ) ;
if ( this . searchpart ! = null & & this . searchpart . indexOf ( '%' ) = = - 1 ) escapeSearchpart ( ) ;
if ( this . anchor ! = null ) this . anchor = escape ( this . anchor ) . toString ( ) ;
if ( this . anchor ! = null ) this . anchor = escape ( this . anchor ) . toString ( ) ;
}
}
/ * *
/ * *
* Url encode / escape the path part according to the allowed characters
* < p > Percent - encode / escape an URL path part according to the allowed characters
* ( RFC1738 & RFC2396 )
* ( see RFC3986 , and formerly RFC1738 & RFC2396 ) . Uses UTF - 8 character codes for
* uses UTF - 8 character codes for non - ASCII
* non - ASCII . < / p >
* < p > Important : already percent - encoded characters are not re - encoded < / p >
*
* @param pathToEscape the path part to escape .
* @return an escaped path with only ASCII characters , or null when pathToEscape
* is null .
* @see < a href = "https://tools.ietf.org/html/rfc3986#section-2.1" > RFC3986
* percent - encoding section < / a >
* @see < z href = "https://tools.ietf.org/html/rfc3986#appendix-A" > RFC3986 path
* definition < / a >
* /
public static String escapePath ( final String pathToEscape ) {
return escapePath ( pathToEscape , false ) ;
}
/ * *
* < p > Percent - encode / escape an URL path regular expression according to the allowed
* characters in an URL path ( see RFC3986 ) and in the { @link Pattern } regular
* expressions . Uses UTF - 8 character codes for non - ASCII . < / p >
* < p > Important : already percent - encoded characters are not re - encoded < / p >
*
* @param pathPattern the URL path regular expression to escape .
* @return an escaped path regular expression with only allowed ASCII
* characters , or null when pathPattern is null .
* @see < a href = "https://tools.ietf.org/html/rfc3986#section-2.1" > RFC3986
* percent - encoding section < / a >
* @see < z href = "https://tools.ietf.org/html/rfc3986#appendix-A" > RFC3986 path
* definition < / a >
* /
public static String escapePathPattern ( final String pathPattern ) {
return escapePath ( pathPattern , true ) ;
}
/ * *
* < p >
* Percent - encode / escape an URL path part according to the allowed characters
* specified in RFC3986 ( formerly RFC1738 and RFC2396 ) . Uses UTF - 8 character
* codes for non - ASCII .
* < / p >
* < p >
* When isPattern is true , the string is processed as a regular expression , and
* therefore meta - characters used by the { @link Pattern } class are not
* percent - encoded .
* < / p >
*
* @param pathToEscape the path part to escape .
* @param isPattern when true , regular meta - characters are not escaped
* @return an escaped path regular expression with only allowed ASCII
* characters , or null when pathPattern is null .
* @see < a href = "https://tools.ietf.org/html/rfc3986#section-2.1" > RFC3986
* percent - encoding section < / a >
* @see < z href = "https://tools.ietf.org/html/rfc3986#appendix-A" > RFC3986 path
* definition < / a >
* /
* /
private void escapePath ( ) {
private static String escapePath ( final String pathToEscape , final boolean isPattern ) {
final StringBuilder ptmp = new StringBuilder ( this . path . length ( ) + 10 ) ;
if ( pathToEscape = = null ) {
return pathToEscape ;
}
final StringBuilder ptmp = new StringBuilder ( pathToEscape . length ( ) + 10 ) ;
boolean modified = false ;
boolean modified = false ;
final int len = this . path . length ( ) ;
final int len = pathToEscape . length ( ) ;
for ( int i = 0 ; i < len ; i + + ) {
int i = 0 ;
int ch = this . path . charAt ( i ) ;
while ( i < len ) {
if ( ch < = 0x7F ) {
int ch = pathToEscape . charAt ( i ) ;
if ( ch = = '%' & & ( i + 2 ) < len ) {
final char digit1 = pathToEscape . charAt ( i + 1 ) ;
final char digit2 = pathToEscape . charAt ( i + 2 ) ;
if ( isHexDigit ( digit1 ) & & isHexDigit ( digit2 ) ) {
/* Already percent-encoded character */
ptmp . append ( ( char ) ch ) ;
/* Normalize hexadecimal digits to upper case */
if ( Character . isLowerCase ( digit1 ) | | Character . isLowerCase ( digit2 ) ) {
modified = true ;
}
ptmp . append ( Character . toUpperCase ( digit1 ) ) ;
ptmp . append ( Character . toUpperCase ( digit2 ) ) ;
i + = 2 ;
} else {
/* Not a valid percent-encoded character : we encode it now */
ptmp . append ( hex [ ch ] ) ;
modified = true ;
}
} else if ( isPattern & & PATTERN_METACHARACTERS . get ( ch ) ) {
ptmp . append ( ( char ) ch ) ;
} else if ( ch < = 0x7F ) {
if ( UNRESERVED_PATH . get ( ch ) ) {
if ( UNRESERVED_PATH . get ( ch ) ) {
ptmp . append ( ( char ) ch ) ;
ptmp . append ( ( char ) ch ) ;
} else {
} else {
@ -585,10 +698,22 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
ptmp . append ( hex [ 0x80 | ( ch & 0x3F ) ] ) ;
ptmp . append ( hex [ 0x80 | ( ch & 0x3F ) ] ) ;
modified = true ;
modified = true ;
}
}
i + + ;
}
}
if ( modified ) {
if ( modified ) {
this . path = ptmp . toString ( ) ;
return ptmp . toString ( ) ;
}
}
return pathToEscape ;
}
/ * *
* @param character a character to test
* @return true when the character is a valid hexadecimal digit
* /
private static boolean isHexDigit ( final int character ) {
return ( character > = '0' & & character < = '9' ) | | ( character > = 'a' & & character < = 'f' )
| | ( character > = 'A' & & character < = 'F' ) ;
}
}
private void escapeSearchpart ( ) {
private void escapeSearchpart ( ) {