diff --git a/source/net/yacy/crawler/data/ZURL.java b/source/net/yacy/crawler/data/ZURL.java index 9a578ef48..6970819fd 100644 --- a/source/net/yacy/crawler/data/ZURL.java +++ b/source/net/yacy/crawler/data/ZURL.java @@ -136,8 +136,7 @@ public class ZURL implements Iterable { if (hash == null) return false; //System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " remove " + hash); try { - this.urlIndex.delete(hash); - return true; + return this.urlIndex.delete(hash); } catch (final IOException e) { return false; } diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java index 8dd8a89e8..09ac43511 100644 --- a/source/net/yacy/crawler/retrieval/HTTPLoader.java +++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java @@ -170,12 +170,6 @@ public final class HTTPLoader { throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown."); } - // check if the url was already loaded - if (Cache.has(redirectionUrl.hash())) { // customer request - this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode); - throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache"); - } - // retry crawling with new url request.redirectURL(redirectionUrl); return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 07689f518..aa1838676 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2901,6 +2901,9 @@ public final class Switchboard extends serverSwitch { return "problem crawling an ftp site: " + e.getMessage(); } } + + // remove the document from the error-db + this.crawlQueues.urlRemove(urlhash); // get a scraper to get the title Document scraper;