From 8eb4181737dbb0fc8fe93debfc2ab96f4a93469e Mon Sep 17 00:00:00 2001 From: borg-0300 Date: Wed, 28 Dec 2005 10:32:21 +0000 Subject: [PATCH] BUGFIX for regular expression git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1261 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/htmlFilter/htmlFilterContentScraper.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index ffd66871e..0e9e4953e 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -164,7 +164,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen // (this is different from previous normal forms where a '/' must not appear in root paths; here it must appear. Makes everything easier.) if (path.length() == 0 || path.charAt(0) != '/') path = "/" + path; - + Pattern pathPattern = Pattern.compile("(/[^/\\.]+/)[.]{2}(?=/)|/\\.(?=/)|/(?=/)"); Matcher matcher = pathPattern.matcher(path); while (matcher.find()) { @@ -172,6 +172,10 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen matcher.reset(path); } + while (path.startsWith("/../")) { + path = path.substring(3); + } + if (defaultPort) return url.getProtocol() + "://" + url.getHost() + path; return url.getProtocol() + "://" + url.getHost() + ":" + url.getPort() + path; }