diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index 371bedd81..4e118eb6c 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -53,7 +53,6 @@ import java.util.Properties;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
import de.anomic.server.logging.serverLog;
import de.anomic.server.serverByteBuffer;
@@ -150,10 +149,10 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
return us;
}
*/
-
+
public static String urlNormalform(URL url) {
boolean defaultPort = false;
- //serverLog.logFinest("htmlFilter", "urlNormalform: '" + url.toString() + "'");
+ // serverLog.logFinest("htmlFilter", "urlNormalform: '" + url.toString() + "'");
if (url.getProtocol().equals("http")) {
if (url.getPort() < 0 || url.getPort() == 80) { defaultPort = true; }
} else if (url.getProtocol().equals("ftp")) {
@@ -162,25 +161,23 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (url.getPort() < 0 || url.getPort() == 443) { defaultPort = true; }
}
String path = url.getFile();
- if ((path.length() == 0) || (path.charAt(0) != '/')) path = "/" + path;
+
// (this is different from previous normal forms where a '/' must not appear in root paths; here it must appear. Makes everything easier.)
- int cpos = path.indexOf("#");
- if (cpos >= 0) path = path.substring(0, cpos);
-
- Pattern pathPattern = Pattern.compile("(/[^/\\.]+/)(?