|
|
|
@ -188,7 +188,7 @@ public final class LoaderDispatcher {
|
|
|
|
|
final String protocol = url.getProtocol();
|
|
|
|
|
final String host = url.getHost();
|
|
|
|
|
|
|
|
|
|
// check if url is in blacklist
|
|
|
|
|
// check if url is in blacklist
|
|
|
|
|
if (checkBlacklist && host != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) {
|
|
|
|
|
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
|
|
|
|
|
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
|
|
|
|
@ -352,7 +352,7 @@ public final class LoaderDispatcher {
|
|
|
|
|
public Document loadDocument(final DigestURI location, final CacheStrategy cachePolicy) throws IOException {
|
|
|
|
|
// load resource
|
|
|
|
|
Request request = request(location, true, false);
|
|
|
|
|
final Response response = this.load(request, cachePolicy, 10000, true);
|
|
|
|
|
final Response response = this.load(request, cachePolicy, true);
|
|
|
|
|
final DigestURI url = request.url();
|
|
|
|
|
if (response == null) throw new IOException("no Response for url " + url);
|
|
|
|
|
|
|
|
|
|