// we know that we cannot process that file before loading
log.logInfo("no parser available ("+parserError+") for url = "+request.url().toString());
sb.crawlQueues.errorURL.push(request,this.sb.peers.mySeed().hash,newDate(),1,"no parser available ("+parserError+") for url = "+request.url().toString());
thrownewIOException("no parser available ("+parserError+") for url = "+request.url().toString());
// only the metadata is returned
if(parserError!=null){
log.logInfo("No parser available in SMB crawler: '"+parserError+"' for URL "+request.url().toString()+": parsing only metadata");
}else{
log.logInfo("Too big file in SMB crawler with size = "+size+" Bytes for URL "+request.url().toString()+": parsing only metadata");
}
// check resource size
longsize=url.length();
if(size>maxFileSize&&maxFileSize>=0){
log.logInfo("REJECTED TOO BIG FILE with size "+size+" Bytes for URL "+request.url().toString());
@ -1727,13 +1727,13 @@ public final class Switchboard extends serverSwitch {
if(condenser==null||document.indexingDenied()){
if(this.log.isInfo())log.logInfo("Not Indexed Resource '"+queueEntry.url().toNormalform(false,true)+"': denied by rule in document, process case="+processCase);
addURLtoErrorDB(queueEntry.url(),referrerURL.hash(),queueEntry.initiator(),dc_title,"unknown indexing process case"+processCase);
addURLtoErrorDB(queueEntry.url(),(referrerURL==null)?"":referrerURL.hash(),queueEntry.initiator(),dc_title,"unknown indexing process case"+processCase);
if(this.log.isInfo())log.logInfo("Not Indexed Resource '"+queueEntry.url().toNormalform(false,true)+"': denied by profile rule, process case="+processCase);
addURLtoErrorDB(queueEntry.url(),referrerURL.hash(),queueEntry.initiator(),dc_title,"unknown indexing process case"+processCase);
addURLtoErrorDB(queueEntry.url(),(referrerURL==null)?"":referrerURL.hash(),queueEntry.initiator(),dc_title,"unknown indexing process case"+processCase);
return;
}
@ -1753,7 +1753,7 @@ public final class Switchboard extends serverSwitch {
RSSFeed.channels((queueEntry.initiator().equals(peers.mySeed().hash))?RSSFeed.LOCALINDEXING:RSSFeed.REMOTEINDEXING).addMessage(newRSSMessage("Indexed web page",dc_title,queueEntry.url().toNormalform(true,false)));
}catch(finalIOExceptione){
if(this.log.isFine())log.logFine("Not Indexed Resource '"+queueEntry.url().toNormalform(false,true)+"': process case="+processCase);