From a0ddf2ec115aa223adbe21ce702f8af43b151017 Mon Sep 17 00:00:00 2001 From: theli Date: Tue, 12 Sep 2006 04:50:12 +0000 Subject: [PATCH] *) AbstractCrawlWorker.java: delete already downloaded data on crawling error *) plasmaSwitchboard.java: log unexpected errors while parsing/indexing git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2552 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/crawler/AbstractCrawlWorker.java | 5 +++++ source/de/anomic/plasma/crawler/http/CrawlWorker.java | 1 + source/de/anomic/plasma/plasmaSwitchboard.java | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index f46b3cfae..7889df481 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -47,6 +47,7 @@ package de.anomic.plasma.crawler; +import java.io.File; import java.io.IOException; import de.anomic.index.indexURL; @@ -277,5 +278,9 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW // push it onto the stack this.sb.urlPool.errorURL.stackPushEntry(ee); + + // delete the cache file + File cacheFile = this.cacheManager.getCachePath(this.url); + if (cacheFile.exists()) cacheFile.delete(); } } diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java index 2a27aaddc..54c1a8a60 100644 --- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java @@ -454,6 +454,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { return load(crawlingRetryCount - 1); } if (failreason != null) { + // add url into error db addURLtoErrorDB(failreason); } return null; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index bee903ef9..44e6c8826 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1749,6 +1749,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } document = null; + } catch (Exception e) { + this.log.logSevere("Unexpected exception while parsing/indexing URL ",e); + } catch (Error e) { + this.log.logSevere("Unexpected exception while parsing/indexing URL ",e); } finally { checkInterruption();