|
|
|
@ -1602,8 +1602,8 @@ public final class HTTPDFileHandler {
|
|
|
|
|
if(m.group(8) != null) url = m.group(8);
|
|
|
|
|
if(m.group(10) != null) url = m.group(10);
|
|
|
|
|
if (url.startsWith("data:") || url.startsWith("#") || url.startsWith("mailto:") || url.startsWith("javascript:")) {
|
|
|
|
|
String newurl = init + url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
|
|
|
String newurl = init + url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
|
|
|
m.appendReplacement(result, newurl);
|
|
|
|
|
|
|
|
|
|
} else if (url.startsWith("http")) {
|
|
|
|
@ -1614,32 +1614,37 @@ public final class HTTPDFileHandler {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
theLogger.fine("malformed url for url-rewirte " + url.toString());
|
|
|
|
|
theLogger.fine("malformed url for url-rewirte: " + url);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String newurl = init + "/proxy.html?url=" + url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
|
|
|
String newurl = init + "/proxy.html?url=" + url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
|
|
|
m.appendReplacement(result, newurl);
|
|
|
|
|
|
|
|
|
|
} else if (url.startsWith("//")) {
|
|
|
|
|
// absoulte url but same protocol of form href="//domain.com/path"
|
|
|
|
|
final String complete_url = proxyurl.getProtocol() + ":" + url;
|
|
|
|
|
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
|
|
|
|
|
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(complete_url)) != null) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// absoulte url but same protocol of form href="//domain.com/path"
|
|
|
|
|
final String complete_url = proxyurl.getProtocol() + ":" + url;
|
|
|
|
|
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
|
|
|
|
|
try {
|
|
|
|
|
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(complete_url)) != null) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
theLogger.fine("malformed url for url-rewirte: " + complete_url);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String newurl = init + "/proxy.html?url=" + complete_url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
|
|
|
m.appendReplacement(result, newurl);
|
|
|
|
|
String newurl = init + "/proxy.html?url=" + complete_url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$", "\\\\\\$");
|
|
|
|
|
m.appendReplacement(result, newurl);
|
|
|
|
|
|
|
|
|
|
} else if (url.startsWith("/")) {
|
|
|
|
|
// absolute path of form href="/absolute/path/to/linked/page"
|
|
|
|
|
String newurl = init + "/proxy.html?url=http://" + host + url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
|
|
|
// absolute path of form href="/absolute/path/to/linked/page"
|
|
|
|
|
String newurl = init + "/proxy.html?url=http://" + host + url;
|
|
|
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
|
|
|
m.appendReplacement(result, newurl);
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|