diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index f46b3cfae..7889df481 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -47,6 +47,7 @@ package de.anomic.plasma.crawler; +import java.io.File; import java.io.IOException; import de.anomic.index.indexURL; @@ -277,5 +278,9 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW // push it onto the stack this.sb.urlPool.errorURL.stackPushEntry(ee); + + // delete the cache file + File cacheFile = this.cacheManager.getCachePath(this.url); + if (cacheFile.exists()) cacheFile.delete(); } } diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java index 2a27aaddc..54c1a8a60 100644 --- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java @@ -454,6 +454,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { return load(crawlingRetryCount - 1); } if (failreason != null) { + // add url into error db addURLtoErrorDB(failreason); } return null; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index bee903ef9..44e6c8826 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1749,6 +1749,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } } document = null; + } catch (Exception e) { + this.log.logSevere("Unexpected exception while parsing/indexing URL ",e); + } catch (Error e) { + this.log.logSevere("Unexpected exception while parsing/indexing URL ",e); } finally { checkInterruption();