borg-0300 19 years ago
parent c3284c27f5
commit 3abd843cdb

@ -160,11 +160,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
} else if (url.getProtocol().equals("https")) { } else if (url.getProtocol().equals("https")) {
if (url.getPort() < 0 || url.getPort() == 443) { defaultPort = true; } if (url.getPort() < 0 || url.getPort() == 443) { defaultPort = true; }
} }
String path = url.getFile(); String path = url.getPath();
// (this is different from previous normal forms where a '/' must not appear in root paths; here it must appear. Makes everything easier.) // (this is different from previous normal forms where a '/' must not appear in root paths; here it must appear. Makes everything easier.)
if (path.length() == 0 || path.charAt(0) != '/') path = "/" + path; if (path.length() == 0 || path.charAt(0) != '/') path = "/" + path;
Pattern pathPattern = Pattern.compile("(/[^/\\.]+/)[.]{2}(?=/)|/\\.(?=/)|/(?=/)"); Pattern pathPattern = Pattern.compile("(/[^/\\.]+/)[.]{2}(?=/)|/\\.(?=/)|/(?=/)");
Matcher matcher = pathPattern.matcher(path); Matcher matcher = pathPattern.matcher(path);
while (matcher.find()) { while (matcher.find()) {
@ -172,6 +172,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
matcher.reset(path); matcher.reset(path);
} }
String query = url.getQuery().replaceAll("[\"\\/:*?<>|]", "_");
if (query != null) { path = path.concat("_").concat(query); }
if (defaultPort) return url.getProtocol() + "://" + url.getHost() + path; if (defaultPort) return url.getProtocol() + "://" + url.getHost() + path;
return url.getProtocol() + "://" + url.getHost() + ":" + url.getPort() + path; return url.getProtocol() + "://" + url.getHost() + ":" + url.getPort() + path;
} }

Loading…
Cancel
Save