diff --git a/source/de/anomic/crawler/FTPLoader.java b/source/de/anomic/crawler/FTPLoader.java index 48c09073a..5d8d715bb 100644 --- a/source/de/anomic/crawler/FTPLoader.java +++ b/source/de/anomic/crawler/FTPLoader.java @@ -28,6 +28,7 @@ package de.anomic.crawler; import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.PrintStream; import java.util.Date; @@ -74,7 +75,7 @@ public class FTPLoader { * @param entry * @return */ - public indexDocumentMetadata load(final CrawlEntry entry) { + public indexDocumentMetadata load(final CrawlEntry entry) throws IOException { final yacyURL entryUrl = entry.url(); final String fullPath = getPath(entryUrl); @@ -133,11 +134,6 @@ public class FTPLoader { (new PrintStream(berr)).print(e.getMessage()); } } - /* - } finally { - closeConnection(ftpClient); - } - */ closeConnection(ftpClient); } @@ -145,8 +141,8 @@ public class FTPLoader { if (berr.size() > 0 || htCache == null) { // some error logging final String detail = (berr.size() > 0) ? "\n Errorlog: " + berr.toString() : ""; - log.logWarning("Unable to download URL " + entry.url().toString() + detail); sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "server download" + detail); + throw new IOException("FTPLoader: Unable to download URL " + entry.url().toString() + detail); } return htCache; diff --git a/source/de/anomic/crawler/HTTPLoader.java b/source/de/anomic/crawler/HTTPLoader.java index e8483a1cc..e21904320 100644 --- a/source/de/anomic/crawler/HTTPLoader.java +++ b/source/de/anomic/crawler/HTTPLoader.java @@ -177,9 +177,8 @@ public final class HTTPLoader { htCache.setCacheArray(responseBody); } else { // if the response has not the right file type then reject file - this.log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString()); sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "wrong mime type or wrong extension"); - htCache = null; + throw new IOException("REJECTED WRONG MIME/EXT TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString()); } return htCache; /* @@ -233,10 +232,8 @@ public final class HTTPLoader { } } else { // if the response has not the right response type then reject file - this.log.logInfo("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + entry.url().toString()); - - // not processed any further sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "wrong http status code " + res.getStatusCode() + ")"); + throw new IOException("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + entry.url().toString()); } /* } finally {