added more exception handling during crawling

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5357 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 10f5ec1040
commit 1918a0173e

@ -28,6 +28,7 @@
package de.anomic.crawler;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Date;
@ -74,7 +75,7 @@ public class FTPLoader {
* @param entry
* @return
*/
public indexDocumentMetadata load(final CrawlEntry entry) {
public indexDocumentMetadata load(final CrawlEntry entry) throws IOException {
final yacyURL entryUrl = entry.url();
final String fullPath = getPath(entryUrl);
@ -133,11 +134,6 @@ public class FTPLoader {
(new PrintStream(berr)).print(e.getMessage());
}
}
/*
} finally {
closeConnection(ftpClient);
}
*/
closeConnection(ftpClient);
}
@ -145,8 +141,8 @@ public class FTPLoader {
if (berr.size() > 0 || htCache == null) {
// some error logging
final String detail = (berr.size() > 0) ? "\n Errorlog: " + berr.toString() : "";
log.logWarning("Unable to download URL " + entry.url().toString() + detail);
sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "server download" + detail);
throw new IOException("FTPLoader: Unable to download URL " + entry.url().toString() + detail);
}
return htCache;

@ -177,9 +177,8 @@ public final class HTTPLoader {
htCache.setCacheArray(responseBody);
} else {
// if the response has not the right file type then reject file
this.log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString());
sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "wrong mime type or wrong extension");
htCache = null;
throw new IOException("REJECTED WRONG MIME/EXT TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString());
}
return htCache;
/*
@ -233,10 +232,8 @@ public final class HTTPLoader {
}
} else {
// if the response has not the right response type then reject file
this.log.logInfo("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + entry.url().toString());
// not processed any further
sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "wrong http status code " + res.getStatusCode() + ")");
throw new IOException("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + entry.url().toString());
}
/*
} finally {

Loading…
Cancel
Save