@ -103,13 +102,13 @@ public final class CrawlStacker {
this.acceptGlobalURLs=acceptGlobalURLs;
this.domainList=domainList;
this.requestQueue=newWorkflowProcessor<Request>("CrawlStacker","This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)",newString[]{"Balancer"},this,"job",10000,null,WorkflowProcessor.availableCPU);
if(warning!=null&&this.log.isFine())this.log.fine("CrawlStacker.stackCrawl of URL "+entry.url().toNormalform(true)+" - not pushed: "+warning);
if(warning!=null&&CrawlStacker.log.isFine())CrawlStacker.log.fine("CrawlStacker.stackCrawl of URL "+entry.url().toNormalform(true)+" - not pushed: "+warning);
returnnull;
}
@ -405,8 +404,8 @@ public final class CrawlStacker {
if(this.log.isInfo())this.log.info("URL '"+urlstring+"' is double registered in '"+dbocc.toString()+"', previous cause: "+errorEntry.getFailReason());
if(CrawlStacker.log.isInfo())CrawlStacker.log.info("URL '"+urlstring+"' is double registered in '"+dbocc.toString()+"', previous cause: "+errorEntry.getFailReason());
if(this.log.isFine())this.log.fine("URL '"+urlstring+"' appeared too often in crawl stack, a maximum of "+maxAllowedPagesPerDomain+" is allowed.");
if(CrawlStacker.log.isFine())CrawlStacker.log.fine("URL '"+urlstring+"' appeared too often in crawl stack, a maximum of "+maxAllowedPagesPerDomain+" is allowed.");
return"crawl stack domain counter exceeded (test by profile)";
}
@ -455,44 +454,44 @@ public final class CrawlStacker {
if(this.log.isFine())this.log.fine("URL '"+urlstring+"' does not match must-match crawling filter '"+profile.urlMustMatchPattern().toString()+"'.");
if(CrawlStacker.log.isFine())CrawlStacker.log.fine("URL '"+urlstring+"' does not match must-match crawling filter '"+profile.urlMustMatchPattern().toString()+"'.");
if(this.log.isFine())this.log.fine("IP "+url.getInetAddress().getHostAddress()+" of URL '"+urlstring+"' does not match must-match crawling filter '"+profile.ipMustMatchPattern().toString()+"'.");
if(CrawlStacker.log.isFine())CrawlStacker.log.fine("IP "+url.getInetAddress().getHostAddress()+" of URL '"+urlstring+"' does not match must-match crawling filter '"+profile.ipMustMatchPattern().toString()+"'.");
return"ip "+url.getInetAddress().getHostAddress()+" of url does not match must-match filter";
if(this.log.isFine())this.log.fine("IP "+url.getInetAddress().getHostAddress()+" of URL '"+urlstring+"' matches must-not-match crawling filter '"+profile.ipMustNotMatchPattern().toString()+"'.");
if(CrawlStacker.log.isFine())CrawlStacker.log.fine("IP "+url.getInetAddress().getHostAddress()+" of URL '"+urlstring+"' matches must-not-match crawling filter '"+profile.ipMustNotMatchPattern().toString()+"'.");
return"ip "+url.getInetAddress().getHostAddress()+" of url matches must-not-match filter";
}
@ -525,7 +524,7 @@ public final class CrawlStacker {
}
}
if(!granted){
if(this.log.isFine())this.log.fine("IP "+url.getInetAddress().getHostAddress()+" of URL '"+urlstring+"' does not match must-match crawling filter '"+profile.ipMustMatchPattern().toString()+"'.");
if(CrawlStacker.log.isFine())CrawlStacker.log.fine("IP "+url.getInetAddress().getHostAddress()+" of URL '"+urlstring+"' does not match must-match crawling filter '"+profile.ipMustMatchPattern().toString()+"'.");
return"country "+c0+" of url does not match must-match filter for countries";
this.log.fine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed ("+"cacheLoader="+this.workers.size()+"), httpClients = "+ConnectionInfo.getCount());
if(CrawlQueues.log.isFine()){
CrawlQueues.log.fine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed ("+"cacheLoader="+this.workers.size()+"), httpClients = "+ConnectionInfo.getCount());
}
returnfalse;
}
finalStringcautionCause=this.sb.onlineCaution();
if(cautionCause!=null){
if(this.log.isFine()){
this.log.fine("remoteCrawlLoaderJob: online caution for "+cautionCause+", omitting processing");
if(CrawlQueues.log.isFine()){
CrawlQueues.log.fine("remoteCrawlLoaderJob: online caution for "+cautionCause+", omitting processing");
}
returnfalse;
}
if(remoteTriggeredCrawlJobSize()>200){
if(this.log.isFine()){
this.log.fine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
if(CrawlQueues.log.isFine()){
CrawlQueues.log.fine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");