From 1d81bd0687674c31482ccd52ba2d291b3cb9346e Mon Sep 17 00:00:00 2001 From: reger Date: Sun, 15 Mar 2015 00:46:07 +0100 Subject: [PATCH] fix url encoding for path see http://mantis.tokeek.de/view.php?id=559 So far we used same escape procedure for all parts of the url (which includes x-www-form-urlencoded for all url components) Added capability to use different encoding rules for the different url components (through specific bitset for each component). (this is inspired by org.apache.http.client and java.net.uri implementation). - Added test case for http://mantis.tokeek.de/view.php?id=559 --- .../cora/document/id/MultiProtocolURL.java | 62 +++++++++++++++++-- .../document/id/MultiProtocolURLTest.java | 9 +-- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 4bfc7bb8a..0875aa66f 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -35,6 +35,7 @@ import java.io.InputStream; import java.io.Serializable; import java.net.InetAddress; import java.net.MalformedURLException; +import java.util.BitSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.Locale; @@ -74,6 +75,46 @@ public class MultiProtocolURL implements Serializable, Comparable 0) ? 1 : 0); } private void escapeSearchpart() { diff --git a/test/net/yacy/cora/document/id/MultiProtocolURLTest.java b/test/net/yacy/cora/document/id/MultiProtocolURLTest.java index e3c7f5a2d..c29b9700b 100644 --- a/test/net/yacy/cora/document/id/MultiProtocolURLTest.java +++ b/test/net/yacy/cora/document/id/MultiProtocolURLTest.java @@ -151,7 +151,8 @@ public class MultiProtocolURLTest { String[][] testStrings = new String[][]{ // teststring , expectedresult new String[]{"http://www.heise.de/newsticker/thema/%23saukontrovers", "http://www.heise.de/newsticker/thema/%23saukontrovers"}, // http://mantis.tokeek.de/view.php?id=519 - new String[]{"http://www.heise.de/newsticker/thema/#saukontrovers", "http://www.heise.de/newsticker/thema/"} + new String[]{"http://www.heise.de/newsticker/thema/#saukontrovers", "http://www.heise.de/newsticker/thema/"}, + new String[]{"http://www.liferay.com/community/wiki/-/wiki/Main/Wiki+Portlet", "http://www.liferay.com/community/wiki/-/wiki/Main/Wiki+Portlet"} // http://mantis.tokeek.de/view.php?id=559 }; for (String[] testString : testStrings) { @@ -159,10 +160,10 @@ public class MultiProtocolURLTest { System.out.print("orig uri: " + testString[0]); String shouldBe = testString[1]; // conversion result - String resolvedHost = new MultiProtocolURL(testString[0]).toNormalform(true); + String resultUrl = new MultiProtocolURL(testString[0]).toNormalform(true); // test if equal - assertEquals(shouldBe, resolvedHost); - System.out.println(" -> " + resolvedHost); + assertEquals(shouldBe, resultUrl); + System.out.println(" -> " + resultUrl); } } }