this.sb.crawlQueues.errorURL.push(request.url(),profile,FailCategory.FINAL_LOAD_CONTEXT,"url in blacklist",-1);
thrownewIOException("CRAWLER Rejecting URL '"+request.url().toString()+"'. URL is in blacklist.");
thrownewIOException("CRAWLER Rejecting URL '"+request.url().toString()+"'. URL is in blacklist.$");
}
// resolve yacy and yacyh domains
@ -141,7 +141,7 @@ public final class HTTPLoader {
if(redirectionUrlString.isEmpty()){
this.sb.crawlQueues.errorURL.push(request.url(),profile,FailCategory.TEMPORARY_NETWORK_FAILURE,"no redirection url provided, field '"+HeaderFramework.LOCATION+"' is empty",statusCode);
thrownewIOException("REJECTED EMTPY REDIRECTION '"+client.getHttpResponse().getStatusLine()+"' for URL " +requestURLString);
thrownewIOException("REJECTED EMTPY REDIRECTION '"+client.getHttpResponse().getStatusLine()+"' for URL '" +requestURLString+"'$");
}
// normalize URL
@ -161,7 +161,7 @@ public final class HTTPLoader {
// if we are already doing a shutdown we don't need to retry crawling
thrownewIOException("REJECTED URL "+request.url()+" because file size '"+contentLength+"' exceeds max filesize limit of "+maxFileSize+" bytes. (GET)");
thrownewIOException("REJECTED URL "+request.url()+" because file size '"+contentLength+"' exceeds max filesize limit of "+maxFileSize+" bytes. (GET)$");
}
// create a new cache entry
@ -202,7 +202,7 @@ public final class HTTPLoader {
}else{
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request.url(),profile,FailCategory.TEMPORARY_NETWORK_FAILURE,"wrong http status code",statusCode);
thrownewIOException("REJECTED WRONG STATUS TYPE '"+client.getHttpResponse().getStatusLine()+"' for URL " +requestURLString);
thrownewIOException("REJECTED WRONG STATUS TYPE '"+client.getHttpResponse().getStatusLine()+"' for URL '" +requestURLString+"'$");