From 26366596d9b2638786cbfdfba66c5ecb106cea45 Mon Sep 17 00:00:00 2001
From: orbiter <mc@yacy.net>
Date: Wed, 4 Sep 2013 16:00:47 +0200
Subject: [PATCH] fix for a problem which ocurres when a site is crawled where
 the start url is redirected.

---
 source/net/yacy/crawler/data/ZURL.java            | 3 +--
 source/net/yacy/crawler/retrieval/HTTPLoader.java | 6 ------
 source/net/yacy/search/Switchboard.java           | 3 +++
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/source/net/yacy/crawler/data/ZURL.java b/source/net/yacy/crawler/data/ZURL.java
index 9a578ef48..6970819fd 100644
--- a/source/net/yacy/crawler/data/ZURL.java
+++ b/source/net/yacy/crawler/data/ZURL.java
@@ -136,8 +136,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
         if (hash == null) return false;
         //System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " remove " + hash);
         try {
-            this.urlIndex.delete(hash);
-            return true;
+            return this.urlIndex.delete(hash);
         } catch (final IOException e) {
             return false;
         }
diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java
index 8dd8a89e8..09ac43511 100644
--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@@ -170,12 +170,6 @@ public final class HTTPLoader {
                     throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
                 }
 
-                // check if the url was already loaded
-                if (Cache.has(redirectionUrl.hash())) { // customer request
-                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
-                    throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
-                }
-
                 // retry crawling with new url
                 request.redirectURL(redirectionUrl);
                 return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 07689f518..aa1838676 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2901,6 +2901,9 @@ public final class Switchboard extends serverSwitch {
                 return "problem crawling an ftp site: " + e.getMessage();
             }
         }
+        
+        // remove the document from the error-db
+        this.crawlQueues.urlRemove(urlhash);
 
         // get a scraper to get the title
         Document scraper;