|
|
|
@ -266,11 +266,24 @@ public final class LoaderDispatcher {
|
|
|
|
|
|
|
|
|
|
// load resource from the internet
|
|
|
|
|
Response response = null;
|
|
|
|
|
if ((protocol.equals("http") || (protocol.equals("https")))) response = this.httpLoader.load(request, maxFileSize, checkBlacklist);
|
|
|
|
|
if (protocol.equals("ftp")) response = this.ftpLoader.load(request, true);
|
|
|
|
|
if (protocol.equals("smb")) response = this.smbLoader.load(request, true);
|
|
|
|
|
if (protocol.equals("file")) response = this.fileLoader.load(request, true);
|
|
|
|
|
if (response != null && response.getContent() != null) {
|
|
|
|
|
if (protocol.equals("http") || protocol.equals("https")) {
|
|
|
|
|
response = this.httpLoader.load(request, maxFileSize, checkBlacklist);
|
|
|
|
|
} else if (protocol.equals("ftp")) {
|
|
|
|
|
response = this.ftpLoader.load(request, true);
|
|
|
|
|
} else if (protocol.equals("smb")) {
|
|
|
|
|
response = this.smbLoader.load(request, true);
|
|
|
|
|
} else if (protocol.equals("file")) {
|
|
|
|
|
response = this.fileLoader.load(request, true);
|
|
|
|
|
} else {
|
|
|
|
|
throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
|
|
|
|
|
}
|
|
|
|
|
if (response == null) {
|
|
|
|
|
throw new IOException("no response (NULL) for url " + url);
|
|
|
|
|
}
|
|
|
|
|
if (response.getContent() == null) {
|
|
|
|
|
throw new IOException("empty response (code " + response.getStatus() + ") for url " + url);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// we got something. Now check if we want to store that to the cache
|
|
|
|
|
// first check looks if we want to store the content to the cache
|
|
|
|
|
if (crawlProfile == null || !crawlProfile.storeHTCache()) {
|
|
|
|
@ -291,9 +304,6 @@ public final class LoaderDispatcher {
|
|
|
|
|
return response;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private int protocolMaxFileSize(final DigestURI url) {
|
|
|
|
|
if (url.isHTTP() || url.isHTTPS())
|
|
|
|
|
return this.sb.getConfigInt("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
|
|
|
|
@ -407,6 +417,7 @@ public final class LoaderDispatcher {
|
|
|
|
|
this.cacheStrategy = cacheStrategy;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public void run() {
|
|
|
|
|
if (this.cache != null && this.cache.exists()) return;
|
|
|
|
|
try {
|
|
|
|
|