better error handling for file loader

pull/1/head
Michael Christen 13 years ago
parent 1d66496362
commit 361146dd7a

@ -266,32 +266,42 @@ public final class LoaderDispatcher {
// load resource from the internet
Response response = null;
if ((protocol.equals("http") || (protocol.equals("https")))) response = this.httpLoader.load(request, maxFileSize, checkBlacklist);
if (protocol.equals("ftp")) response = this.ftpLoader.load(request, true);
if (protocol.equals("smb")) response = this.smbLoader.load(request, true);
if (protocol.equals("file")) response = this.fileLoader.load(request, true);
if (response != null && response.getContent() != null) {
// we got something. Now check if we want to store that to the cache
// first check looks if we want to store the content to the cache
if (crawlProfile == null || !crawlProfile.storeHTCache()) {
// no caching wanted. Thats ok, do not write any message
return response;
}
// second check tells us if the protocoll tells us something about caching
final String storeError = response.shallStoreCacheForCrawler();
if (storeError == null) {
try {
Cache.store(url, response.getResponseHeader(), response.getContent());
} catch (final IOException e) {
this.log.logWarning("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
}
} else {
this.log.logWarning("cannot write " + response.url() + " to Cache (4): " + storeError);
}
if (protocol.equals("http") || protocol.equals("https")) {
response = this.httpLoader.load(request, maxFileSize, checkBlacklist);
} else if (protocol.equals("ftp")) {
response = this.ftpLoader.load(request, true);
} else if (protocol.equals("smb")) {
response = this.smbLoader.load(request, true);
} else if (protocol.equals("file")) {
response = this.fileLoader.load(request, true);
} else {
throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
}
if (response == null) {
throw new IOException("no response (NULL) for url " + url);
}
if (response.getContent() == null) {
throw new IOException("empty response (code " + response.getStatus() + ") for url " + url);
}
// we got something. Now check if we want to store that to the cache
// first check looks if we want to store the content to the cache
if (crawlProfile == null || !crawlProfile.storeHTCache()) {
// no caching wanted. Thats ok, do not write any message
return response;
}
throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
// second check tells us if the protocoll tells us something about caching
final String storeError = response.shallStoreCacheForCrawler();
if (storeError == null) {
try {
Cache.store(url, response.getResponseHeader(), response.getContent());
} catch (final IOException e) {
this.log.logWarning("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
}
} else {
this.log.logWarning("cannot write " + response.url() + " to Cache (4): " + storeError);
}
return response;
}
private int protocolMaxFileSize(final DigestURI url) {
@ -407,6 +417,7 @@ public final class LoaderDispatcher {
this.cacheStrategy = cacheStrategy;
}
@Override
public void run() {
if (this.cache != null && this.cache.exists()) return;
try {

Loading…
Cancel
Save