set more logger to 'final static'

pull/1/head
Michael Peter Christen 11 years ago
parent c60947360d
commit 1a4a69c226

@ -37,9 +37,10 @@ import net.yacy.server.serverSwitch;
public class termlist_p { public class termlist_p {
private final static ConcurrentLog log = new ConcurrentLog("TERMLIST");
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final ConcurrentLog log = new ConcurrentLog("TERMLIST");
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
Segment segment = sb.index; Segment segment = sb.index;

@ -42,7 +42,7 @@ import net.yacy.cora.util.ConcurrentLog;
*/ */
public class SynonymLibrary { public class SynonymLibrary {
ConcurrentLog log = new ConcurrentLog(SynonymLibrary.class.getName()); private final static ConcurrentLog log = new ConcurrentLog(SynonymLibrary.class.getName());
private Map<String, List<Set<String>>> lib; private Map<String, List<Set<String>>> lib;
public SynonymLibrary(final File path) { public SynonymLibrary(final File path) {

@ -36,7 +36,7 @@ public class ResponseHeader extends HeaderFramework {
// response header properties // response header properties
private static final long serialVersionUID = 0L; private static final long serialVersionUID = 0L;
private static ConcurrentLog log = new ConcurrentLog(ResponseHeader.class.getName()); private static final ConcurrentLog log = new ConcurrentLog(ResponseHeader.class.getName());
public ResponseHeader(final int statusCode) { public ResponseHeader(final int statusCode) {
super(); super();

@ -71,7 +71,7 @@ import net.yacy.cora.util.ConcurrentLog;
public class FTPClient { public class FTPClient {
private static ConcurrentLog log = new ConcurrentLog("FTPClient"); private static final ConcurrentLog log = new ConcurrentLog("FTPClient");
private static final String vDATE = "20100823"; private static final String vDATE = "20100823";

@ -63,7 +63,6 @@ import net.yacy.peers.SeedDB;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine; import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
@ -72,8 +71,8 @@ public final class CrawlStacker {
public static String ERROR_NO_MATCH_MUST_MATCH_FILTER = "url does not match must-match filter "; public static String ERROR_NO_MATCH_MUST_MATCH_FILTER = "url does not match must-match filter ";
public static String ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER = "url matches must-not-match filter "; public static String ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER = "url matches must-not-match filter ";
private final static ConcurrentLog log = new ConcurrentLog("STACKCRAWL");
private final ConcurrentLog log = new ConcurrentLog("STACKCRAWL");
private final RobotsTxt robots; private final RobotsTxt robots;
private final WorkflowProcessor<Request> requestQueue; private final WorkflowProcessor<Request> requestQueue;
public final CrawlQueues nextQueue; public final CrawlQueues nextQueue;
@ -103,13 +102,13 @@ public final class CrawlStacker {
this.acceptGlobalURLs = acceptGlobalURLs; this.acceptGlobalURLs = acceptGlobalURLs;
this.domainList = domainList; this.domainList = domainList;
this.requestQueue = new WorkflowProcessor<Request>("CrawlStacker", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, WorkflowProcessor.availableCPU); this.requestQueue = new WorkflowProcessor<Request>("CrawlStacker", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, WorkflowProcessor.availableCPU);
this.log.info("STACKCRAWL thread initialized."); CrawlStacker.log.info("STACKCRAWL thread initialized.");
} }
public int size() { public int size() {
return this.requestQueue.getQueueSize(); return this.requestQueue.getQueueSize();
} }
public boolean isEmpty() { public boolean isEmpty() {
if (!this.requestQueue.queueIsEmpty()) return false; if (!this.requestQueue.queueIsEmpty()) return false;
return true; return true;
@ -120,15 +119,15 @@ public final class CrawlStacker {
} }
public void announceClose() { public void announceClose() {
this.log.info("Flushing remaining " + size() + " crawl stacker job entries."); CrawlStacker.log.info("Flushing remaining " + size() + " crawl stacker job entries.");
this.requestQueue.shutdown(); this.requestQueue.shutdown();
} }
public synchronized void close() { public synchronized void close() {
this.log.info("Shutdown. waiting for remaining " + size() + " crawl stacker job entries. please wait."); CrawlStacker.log.info("Shutdown. waiting for remaining " + size() + " crawl stacker job entries. please wait.");
this.requestQueue.shutdown(); this.requestQueue.shutdown();
this.log.info("Shutdown. Closing stackCrawl queue."); CrawlStacker.log.info("Shutdown. Closing stackCrawl queue.");
clear(); clear();
} }
@ -155,7 +154,7 @@ public final class CrawlStacker {
this.nextQueue.errorURL.push(entry.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1); this.nextQueue.errorURL.push(entry.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
} }
} catch (final Exception e) { } catch (final Exception e) {
CrawlStacker.this.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e); CrawlStacker.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
return null; return null;
} }
return null; return null;
@ -164,7 +163,7 @@ public final class CrawlStacker {
public void enqueueEntry(final Request entry) { public void enqueueEntry(final Request entry) {
// DEBUG // DEBUG
if (this.log.isFinest()) this.log.finest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + ((entry.initiator() == null) ? "" : ASCII.String(entry.initiator())) + ", name=" + entry.name() + ", appdate=" + entry.appdate() + ", depth=" + entry.depth()); if (CrawlStacker.log.isFinest()) CrawlStacker.log.finest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + ((entry.initiator() == null) ? "" : ASCII.String(entry.initiator())) + ", name=" + entry.name() + ", appdate=" + entry.appdate() + ", depth=" + entry.depth());
this.requestQueue.enQueue(entry); this.requestQueue.enQueue(entry);
} }
public void enqueueEntriesAsynchronous(final byte[] initiator, final String profileHandle, final List<AnchorURL> hyperlinks) { public void enqueueEntriesAsynchronous(final byte[] initiator, final String profileHandle, final List<AnchorURL> hyperlinks) {
@ -310,7 +309,7 @@ public final class CrawlStacker {
String error; String error;
if (profile == null) { if (profile == null) {
error = "LOST STACKER PROFILE HANDLE '" + entry.profileHandle() + "' for URL " + entry.url(); error = "LOST STACKER PROFILE HANDLE '" + entry.profileHandle() + "' for URL " + entry.url();
this.log.warn(error); CrawlStacker.log.warn(error);
return error; return error;
} }
@ -334,7 +333,7 @@ public final class CrawlStacker {
if (!local && !global && !remote && !proxy) { if (!local && !global && !remote && !proxy) {
error = "URL '" + entry.url().toString() + "' cannot be crawled. initiator = " + ((entry.initiator() == null) ? "" : ASCII.String(entry.initiator())) + ", profile.handle = " + profile.handle(); error = "URL '" + entry.url().toString() + "' cannot be crawled. initiator = " + ((entry.initiator() == null) ? "" : ASCII.String(entry.initiator())) + ", profile.handle = " + profile.handle();
this.log.severe(error); CrawlStacker.log.severe(error);
return error; return error;
} }
@ -362,20 +361,20 @@ public final class CrawlStacker {
if (global) { if (global) {
// it may be possible that global == true and local == true, so do not check an error case against it // it may be possible that global == true and local == true, so do not check an error case against it
if (proxy) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, proxy = true, initiator = proxy" + ", profile.handle = " + profile.handle()); if (proxy) CrawlStacker.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, proxy = true, initiator = proxy" + ", profile.handle = " + profile.handle());
if (remote) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle()); if (remote) CrawlStacker.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: global = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle());
warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.GLOBAL, entry, profile, this.robots); warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.GLOBAL, entry, profile, this.robots);
} else if (local) { } else if (local) {
if (proxy) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, proxy = true, initiator = proxy" + ", profile.handle = " + profile.handle()); if (proxy) CrawlStacker.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, proxy = true, initiator = proxy" + ", profile.handle = " + profile.handle());
if (remote) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle()); if (remote) CrawlStacker.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: local = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle());
warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, profile, this.robots); warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, profile, this.robots);
} else if (proxy) { } else if (proxy) {
if (remote) this.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: proxy = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle()); if (remote) CrawlStacker.log.warn("URL '" + entry.url().toString() + "' has conflicting initiator properties: proxy = true, remote = true, initiator = " + ASCII.String(entry.initiator()) + ", profile.handle = " + profile.handle());
warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, profile, this.robots); warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.LOCAL, entry, profile, this.robots);
} else if (remote) { } else if (remote) {
warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.REMOTE, entry, profile, this.robots); warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.REMOTE, entry, profile, this.robots);
} }
if (warning != null && this.log.isFine()) this.log.fine("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true) + " - not pushed: " + warning); if (warning != null && CrawlStacker.log.isFine()) CrawlStacker.log.fine("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true) + " - not pushed: " + warning);
return null; return null;
} }
@ -405,8 +404,8 @@ public final class CrawlStacker {
} else { } else {
final boolean recrawl = profile.recrawlIfOlder() > oldDate.getTime(); final boolean recrawl = profile.recrawlIfOlder() > oldDate.getTime();
if (recrawl) { if (recrawl) {
if (this.log.isInfo()) if (CrawlStacker.log.isInfo())
this.log.info("RE-CRAWL of URL '" + urlstring + "': this url was crawled " + CrawlStacker.log.info("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
((System.currentTimeMillis() - oldDate.getTime()) / 60000 / 60 / 24) + " days ago."); ((System.currentTimeMillis() - oldDate.getTime()) / 60000 / 60 / 24) + " days ago.");
} else { } else {
if (dbocc == null) { if (dbocc == null) {
@ -414,10 +413,10 @@ public final class CrawlStacker {
} }
if (dbocc == HarvestProcess.ERRORS) { if (dbocc == HarvestProcess.ERRORS) {
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash())); final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.getFailReason()); if (CrawlStacker.log.isInfo()) CrawlStacker.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.getFailReason());
return "double in: errors (" + errorEntry.getFailReason() + "), oldDate = " + oldDate.toString(); return "double in: errors (" + errorEntry.getFailReason() + "), oldDate = " + oldDate.toString();
} }
if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "'. "); if (CrawlStacker.log.isInfo()) CrawlStacker.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "'. ");
return "double in: " + dbocc.toString() + ", oldDate = " + oldDate.toString(); return "double in: " + dbocc.toString() + ", oldDate = " + oldDate.toString();
} }
} }
@ -427,7 +426,7 @@ public final class CrawlStacker {
if (maxAllowedPagesPerDomain < Integer.MAX_VALUE && maxAllowedPagesPerDomain > 0) { if (maxAllowedPagesPerDomain < Integer.MAX_VALUE && maxAllowedPagesPerDomain > 0) {
final AtomicInteger dp = profile.getCount(url.getHost()); final AtomicInteger dp = profile.getCount(url.getHost());
if (dp != null && dp.get() >= maxAllowedPagesPerDomain) { if (dp != null && dp.get() >= maxAllowedPagesPerDomain) {
if (this.log.isFine()) this.log.fine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + maxAllowedPagesPerDomain + " is allowed."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + maxAllowedPagesPerDomain + " is allowed.");
return "crawl stack domain counter exceeded (test by profile)"; return "crawl stack domain counter exceeded (test by profile)";
} }
@ -455,44 +454,44 @@ public final class CrawlStacker {
final String urlProtocol = url.getProtocol(); final String urlProtocol = url.getProtocol();
final String urlstring = url.toString(); final String urlstring = url.toString();
if (!Switchboard.getSwitchboard().loader.isSupportedProtocol(urlProtocol)) { if (!Switchboard.getSwitchboard().loader.isSupportedProtocol(urlProtocol)) {
this.log.severe("Unsupported protocol in URL '" + urlstring + "'."); CrawlStacker.log.severe("Unsupported protocol in URL '" + urlstring + "'.");
return "unsupported protocol"; return "unsupported protocol";
} }
// check if ip is local ip address // check if ip is local ip address
final String urlRejectReason = urlInAcceptedDomain(url); final String urlRejectReason = urlInAcceptedDomain(url);
if (urlRejectReason != null) { if (urlRejectReason != null) {
if (this.log.isFine()) this.log.fine("denied_(" + urlRejectReason + ")"); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("denied_(" + urlRejectReason + ")");
return "denied_(" + urlRejectReason + ")"; return "denied_(" + urlRejectReason + ")";
} }
// check blacklist // check blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) {
this.log.fine("URL '" + urlstring + "' is in blacklist."); CrawlStacker.log.fine("URL '" + urlstring + "' is in blacklist.");
return "url in blacklist"; return "url in blacklist";
} }
// filter with must-match for URLs // filter with must-match for URLs
if ((depth > 0) && !profile.urlMustMatchPattern().matcher(urlstring).matches()) { if ((depth > 0) && !profile.urlMustMatchPattern().matcher(urlstring).matches()) {
if (this.log.isFine()) this.log.fine("URL '" + urlstring + "' does not match must-match crawling filter '" + profile.urlMustMatchPattern().toString() + "'."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("URL '" + urlstring + "' does not match must-match crawling filter '" + profile.urlMustMatchPattern().toString() + "'.");
return ERROR_NO_MATCH_MUST_MATCH_FILTER + profile.urlMustMatchPattern().toString(); return ERROR_NO_MATCH_MUST_MATCH_FILTER + profile.urlMustMatchPattern().toString();
} }
// filter with must-not-match for URLs // filter with must-not-match for URLs
if ((depth > 0) && profile.urlMustNotMatchPattern().matcher(urlstring).matches()) { if ((depth > 0) && profile.urlMustNotMatchPattern().matcher(urlstring).matches()) {
if (this.log.isFine()) this.log.fine("URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.urlMustNotMatchPattern().toString() + "'."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.urlMustNotMatchPattern().toString() + "'.");
return ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER + profile.urlMustNotMatchPattern().toString(); return ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER + profile.urlMustNotMatchPattern().toString();
} }
// deny cgi // deny cgi
if (url.isIndividual() && !profile.crawlingQ()) { // TODO: make special property for crawlingIndividual if (url.isIndividual() && !profile.crawlingQ()) { // TODO: make special property for crawlingIndividual
if (this.log.isFine()) this.log.fine("URL '" + urlstring + "' is CGI URL."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("URL '" + urlstring + "' is CGI URL.");
return "individual url (sessionid etc) not wanted"; return "individual url (sessionid etc) not wanted";
} }
// deny post properties // deny post properties
if (url.isPOST() && !profile.crawlingQ()) { if (url.isPOST() && !profile.crawlingQ()) {
if (this.log.isFine()) this.log.fine("URL '" + urlstring + "' is post URL."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("URL '" + urlstring + "' is post URL.");
return "post url not allowed"; return "post url not allowed";
} }
@ -501,13 +500,13 @@ public final class CrawlStacker {
// filter with must-match for IPs // filter with must-match for IPs
if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_ALL_PATTERN && url.getHost() != null && !profile.ipMustMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) { if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_ALL_PATTERN && url.getHost() != null && !profile.ipMustMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) {
if (this.log.isFine()) this.log.fine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' does not match must-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' does not match must-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'.");
return "ip " + url.getInetAddress().getHostAddress() + " of url does not match must-match filter"; return "ip " + url.getInetAddress().getHostAddress() + " of url does not match must-match filter";
} }
// filter with must-not-match for IPs // filter with must-not-match for IPs
if ((depth > 0) && profile.ipMustNotMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && url.getHost() != null && profile.ipMustNotMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) { if ((depth > 0) && profile.ipMustNotMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && url.getHost() != null && profile.ipMustNotMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) {
if (this.log.isFine()) this.log.fine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.ipMustNotMatchPattern().toString() + "'."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.ipMustNotMatchPattern().toString() + "'.");
return "ip " + url.getInetAddress().getHostAddress() + " of url matches must-not-match filter"; return "ip " + url.getInetAddress().getHostAddress() + " of url matches must-not-match filter";
} }
@ -525,7 +524,7 @@ public final class CrawlStacker {
} }
} }
if (!granted) { if (!granted) {
if (this.log.isFine()) this.log.fine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' does not match must-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'."); if (CrawlStacker.log.isFine()) CrawlStacker.log.fine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' does not match must-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'.");
return "country " + c0 + " of url does not match must-match filter for countries"; return "country " + c0 + " of url does not match must-match filter for countries";
} }
} }

@ -62,9 +62,10 @@ import net.yacy.search.index.ErrorCache;
import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionConfiguration;
public class CrawlQueues { public class CrawlQueues {
private final static ConcurrentLog log = new ConcurrentLog("CRAWLER");
private Switchboard sb; private Switchboard sb;
private ConcurrentLog log;
private Map<Integer, Loader> workers; // mapping from url hash to Worker thread object private Map<Integer, Loader> workers; // mapping from url hash to Worker thread object
private final ArrayList<String> remoteCrawlProviderHashes; private final ArrayList<String> remoteCrawlProviderHashes;
@ -74,12 +75,11 @@ public class CrawlQueues {
public CrawlQueues(final Switchboard sb, final File queuePath) { public CrawlQueues(final Switchboard sb, final File queuePath) {
this.sb = sb; this.sb = sb;
this.log = new ConcurrentLog("CRAWLER");
this.workers = new ConcurrentHashMap<Integer, Loader>(); this.workers = new ConcurrentHashMap<Integer, Loader>();
this.remoteCrawlProviderHashes = new ArrayList<String>(); this.remoteCrawlProviderHashes = new ArrayList<String>();
// start crawling management // start crawling management
this.log.config("Starting Crawling Management"); log.config("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.useTailCache, sb.exceed134217727); this.noticeURL = new NoticedURL(queuePath, sb.useTailCache, sb.exceed134217727);
this.errorURL = new ErrorCache(sb.index.fulltext()); this.errorURL = new ErrorCache(sb.index.fulltext());
this.delegatedURL = new ConcurrentHashMap<String, DigestURL>(); this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
@ -214,7 +214,7 @@ public class CrawlQueues {
for (int i = 0; i < toshift; i++) { for (int i = 0; i < toshift; i++) {
this.noticeURL.shift(NoticedURL.StackType.GLOBAL, NoticedURL.StackType.LOCAL, this.sb.crawler, this.sb.robots); this.noticeURL.shift(NoticedURL.StackType.GLOBAL, NoticedURL.StackType.LOCAL, this.sb.crawler, this.sb.robots);
} }
this.log.info("shifted " + toshift + " jobs from global crawl to local crawl (coreCrawlJobSize()=" + coreCrawlJobSize() + CrawlQueues.log.info("shifted " + toshift + " jobs from global crawl to local crawl (coreCrawlJobSize()=" + coreCrawlJobSize() +
", limitCrawlJobSize()=" + limitCrawlJobSize() + ", cluster.mode=" + this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "") + ", limitCrawlJobSize()=" + limitCrawlJobSize() + ", cluster.mode=" + this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "") +
", robinsonMode=" + ((this.sb.isRobinsonMode()) ? "on" : "off")); ", robinsonMode=" + ((this.sb.isRobinsonMode()) ? "on" : "off"));
} }
@ -222,15 +222,15 @@ public class CrawlQueues {
final String queueCheckCore = loadIsPossible(NoticedURL.StackType.LOCAL); final String queueCheckCore = loadIsPossible(NoticedURL.StackType.LOCAL);
final String queueCheckNoload = loadIsPossible(NoticedURL.StackType.NOLOAD); final String queueCheckNoload = loadIsPossible(NoticedURL.StackType.NOLOAD);
if (queueCheckCore != null && queueCheckNoload != null) { if (queueCheckCore != null && queueCheckNoload != null) {
if (this.log.isFine()) { if (CrawlQueues.log.isFine()) {
this.log.fine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload); CrawlQueues.log.fine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload);
} }
return false; return false;
} }
if (isPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) { if (isPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
if (this.log.isFine()) { if (CrawlQueues.log.isFine()) {
this.log.fine("omitting de-queue/local: paused"); CrawlQueues.log.fine("omitting de-queue/local: paused");
} }
return false; return false;
} }
@ -252,12 +252,12 @@ public class CrawlQueues {
} }
final String profileHandle = urlEntry.profileHandle(); final String profileHandle = urlEntry.profileHandle();
if (profileHandle == null) { if (profileHandle == null) {
this.log.severe(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url()); CrawlQueues.log.severe(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
return true; return true;
} }
final CrawlProfile profile = this.sb.crawler.get(ASCII.getBytes(profileHandle)); final CrawlProfile profile = this.sb.crawler.get(ASCII.getBytes(profileHandle));
if (profile == null) { if (profile == null) {
this.log.severe(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url()); CrawlQueues.log.severe(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
return true; return true;
} }
this.sb.indexingDocumentProcessor.enQueue(new IndexingQueueEntry(new Response(urlEntry, profile), null, null)); this.sb.indexingDocumentProcessor.enQueue(new IndexingQueueEntry(new Response(urlEntry, profile), null, null));
@ -272,13 +272,13 @@ public class CrawlQueues {
// System.out.println("DEBUG plasmaSwitchboard.processCrawling: // System.out.println("DEBUG plasmaSwitchboard.processCrawling:
// profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url()); // profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
if (urlEntry.profileHandle() == null) { if (urlEntry.profileHandle() == null) {
this.log.severe(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url()); CrawlQueues.log.severe(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
return true; return true;
} }
load(urlEntry, stats); load(urlEntry, stats);
return true; return true;
} catch (final IOException e) { } catch (final IOException e) {
this.log.severe(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e); CrawlQueues.log.severe(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
if (e.getMessage().indexOf("hash is null",0) > 0) { if (e.getMessage().indexOf("hash is null",0) > 0) {
this.noticeURL.clear(NoticedURL.StackType.LOCAL); this.noticeURL.clear(NoticedURL.StackType.LOCAL);
} }
@ -303,8 +303,8 @@ public class CrawlQueues {
final DigestURL url = urlEntry.url(); final DigestURL url = urlEntry.url();
final String urlProtocol = url.getProtocol(); final String urlProtocol = url.getProtocol();
if (this.sb.loader.isSupportedProtocol(urlProtocol)) { if (this.sb.loader.isSupportedProtocol(urlProtocol)) {
if (this.log.isFine()) { if (CrawlQueues.log.isFine()) {
this.log.fine(stats + ": URL=" + urlEntry.url() CrawlQueues.log.fine(stats + ": URL=" + urlEntry.url()
+ ", initiator=" + ((urlEntry.initiator() == null) ? "" : ASCII.String(urlEntry.initiator())) + ", initiator=" + ((urlEntry.initiator() == null) ? "" : ASCII.String(urlEntry.initiator()))
+ ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false")
+ ", depth=" + urlEntry.depth() + ", depth=" + urlEntry.depth()
@ -316,7 +316,7 @@ public class CrawlQueues {
// work off one Crawl stack entry // work off one Crawl stack entry
if (urlEntry == null || urlEntry.url() == null) { if (urlEntry == null || urlEntry.url() == null) {
this.log.info(stats + ": urlEntry = null"); CrawlQueues.log.info(stats + ": urlEntry = null");
} else { } else {
if (!this.workers.containsKey(Integer.valueOf(urlEntry.hashCode()))) { if (!this.workers.containsKey(Integer.valueOf(urlEntry.hashCode()))) {
Loader loader = new Loader(urlEntry); Loader loader = new Loader(urlEntry);
@ -331,10 +331,10 @@ public class CrawlQueues {
} }
} else { } else {
this.log.severe("Unsupported protocol in URL '" + url.toString()); CrawlQueues.log.severe("Unsupported protocol in URL '" + url.toString());
} }
} else { } else {
if (this.log.isFine()) this.log.fine(stats + ": LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url()); if (CrawlQueues.log.isFine()) CrawlQueues.log.fine(stats + ": LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
} }
} }
@ -407,30 +407,30 @@ public class CrawlQueues {
} }
// check again // check again
if (this.workers.size() >= this.sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 20)) { if (this.workers.size() >= this.sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 20)) {
if (this.log.isFine()) { if (CrawlQueues.log.isFine()) {
this.log.fine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount()); CrawlQueues.log.fine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount());
} }
return false; return false;
} }
final String cautionCause = this.sb.onlineCaution(); final String cautionCause = this.sb.onlineCaution();
if (cautionCause != null) { if (cautionCause != null) {
if (this.log.isFine()) { if (CrawlQueues.log.isFine()) {
this.log.fine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing"); CrawlQueues.log.fine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing");
} }
return false; return false;
} }
if (remoteTriggeredCrawlJobSize() > 200) { if (remoteTriggeredCrawlJobSize() > 200) {
if (this.log.isFine()) { if (CrawlQueues.log.isFine()) {
this.log.fine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing"); CrawlQueues.log.fine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
} }
return false; return false;
} }
if (coreCrawlJobSize() > 0 /*&& sb.indexingStorageProcessor.queueSize() > 0*/) { if (coreCrawlJobSize() > 0 /*&& sb.indexingStorageProcessor.queueSize() > 0*/) {
if (this.log.isFine()) { if (CrawlQueues.log.isFine()) {
this.log.fine("remoteCrawlLoaderJob: a local crawl is running, omitting processing"); CrawlQueues.log.fine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
} }
return false; return false;
} }
@ -521,7 +521,7 @@ public class CrawlQueues {
item.getSize() item.getSize()
)); ));
} else { } else {
this.log.warn("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason); CrawlQueues.log.warn("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason);
} }
} }
return true; return true;
@ -554,15 +554,15 @@ public class CrawlQueues {
// or there is no global crawl on the stack // or there is no global crawl on the stack
final String queueCheck = loadIsPossible(NoticedURL.StackType.REMOTE); final String queueCheck = loadIsPossible(NoticedURL.StackType.REMOTE);
if (queueCheck != null) { if (queueCheck != null) {
if (this.log.isFinest()) { if (CrawlQueues.log.isFinest()) {
this.log.finest("omitting de-queue/remote: " + queueCheck); CrawlQueues.log.finest("omitting de-queue/remote: " + queueCheck);
} }
return false; return false;
} }
if (isPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) { if (isPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
if (this.log.isFinest()) { if (CrawlQueues.log.isFinest()) {
this.log.finest("omitting de-queue/remote: paused"); CrawlQueues.log.finest("omitting de-queue/remote: paused");
} }
return false; return false;
} }
@ -576,7 +576,7 @@ public class CrawlQueues {
load(urlEntry, stats); load(urlEntry, stats);
return true; return true;
} catch (final IOException e) { } catch (final IOException e) {
this.log.severe(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e); CrawlQueues.log.severe(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
if (e.getMessage().indexOf("hash is null",0) > 0) { if (e.getMessage().indexOf("hash is null",0) > 0) {
this.noticeURL.clear(NoticedURL.StackType.REMOTE); this.noticeURL.clear(NoticedURL.StackType.REMOTE);
} }
@ -632,8 +632,8 @@ public class CrawlQueues {
final Response response = CrawlQueues.this.sb.loader.load(this.request, profile == null ? CacheStrategy.IFEXIST : profile.cacheStrategy(), BlacklistType.CRAWLER, this.profile.getAgent()); final Response response = CrawlQueues.this.sb.loader.load(this.request, profile == null ? CacheStrategy.IFEXIST : profile.cacheStrategy(), BlacklistType.CRAWLER, this.profile.getAgent());
if (response == null) { if (response == null) {
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED); this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
if (CrawlQueues.this.log.isFine()) { if (CrawlQueues.log.isFine()) {
CrawlQueues.this.log.fine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)"); CrawlQueues.log.fine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)");
} }
result = "no content (possibly caused by cache policy)"; result = "no content (possibly caused by cache policy)";
} else { } else {
@ -644,8 +644,8 @@ public class CrawlQueues {
} }
} catch (final IOException e) { } catch (final IOException e) {
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED); this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
if (CrawlQueues.this.log.isFine()) { if (CrawlQueues.log.isFine()) {
CrawlQueues.this.log.fine("problem loading " + this.request.url().toString() + ": " + e.getMessage()); CrawlQueues.log.fine("problem loading " + this.request.url().toString() + ": " + e.getMessage());
} }
result = "load error - " + e.getMessage(); result = "load error - " + e.getMessage();
} }

@ -54,7 +54,7 @@ import net.yacy.repository.Blacklist.BlacklistType;
public class RobotsTxt { public class RobotsTxt {
private static ConcurrentLog log = new ConcurrentLog(RobotsTxt.class.getName()); private final static ConcurrentLog log = new ConcurrentLog(RobotsTxt.class.getName());
protected static final String ROBOTS_DB_PATH_SEPARATOR = ";"; protected static final String ROBOTS_DB_PATH_SEPARATOR = ";";
protected static final Pattern ROBOTS_DB_PATH_SEPARATOR_MATCHER = Pattern.compile(ROBOTS_DB_PATH_SEPARATOR); protected static final Pattern ROBOTS_DB_PATH_SEPARATOR_MATCHER = Pattern.compile(ROBOTS_DB_PATH_SEPARATOR);

@ -41,7 +41,7 @@ import net.yacy.cora.util.ConcurrentLog;
*/ */
public class LanguageStatistics { public class LanguageStatistics {
private static ConcurrentLog logger = new ConcurrentLog("LANGUAGESTATISTICS"); private final static ConcurrentLog logger = new ConcurrentLog("LANGUAGESTATISTICS");
/** This variable holds the name of the language. */ /** This variable holds the name of the language. */
private String langName = null; private String langName = null;

@ -56,7 +56,7 @@ public class Switchboard {
*/ */
private static Properties properties = new Properties(); private static Properties properties = new Properties();
public static ConcurrentLog log = new ConcurrentLog(Switchboard.class.getName()); public final static ConcurrentLog log = new ConcurrentLog(Switchboard.class.getName());
public static void startInfoUpdater() { public static void startInfoUpdater() {

@ -102,6 +102,8 @@ public final class OS {
return maxmem; return maxmem;
} }
private final static ConcurrentLog memchecklog = new ConcurrentLog("MEMCHECK");
/** /**
* checks heap (may cause high system load) * checks heap (may cause high system load)
* @param mem heap to check in -Xmx<i>[heap]</i>m * @param mem heap to check in -Xmx<i>[heap]</i>m
@ -115,7 +117,7 @@ public final class OS {
processArgs.add("-Xms4m"); processArgs.add("-Xms4m");
processArgs.add("-Xmx" + Integer.toString(mem) + "m"); processArgs.add("-Xmx" + Integer.toString(mem) + "m");
try { try {
line = ConsoleInterface.getLastLineConsoleOutput(processArgs, new ConcurrentLog("MEMCHECK")); line = ConsoleInterface.getLastLineConsoleOutput(processArgs, memchecklog);
} catch (final IOException e) { } catch (final IOException e) {
return false; return false;
} }

@ -67,6 +67,7 @@ import net.yacy.search.Switchboard;
public final class LoaderDispatcher { public final class LoaderDispatcher {
private final static int accessTimeMaxsize = 1000; private final static int accessTimeMaxsize = 1000;
private final static ConcurrentLog log = new ConcurrentLog("LOADER");
private static final ConcurrentHashMap<String, Long> accessTime = new ConcurrentHashMap<String, Long>(); // to protect targets from DDoS private static final ConcurrentHashMap<String, Long> accessTime = new ConcurrentHashMap<String, Long>(); // to protect targets from DDoS
private final Switchboard sb; private final Switchboard sb;
@ -76,18 +77,16 @@ public final class LoaderDispatcher {
private final SMBLoader smbLoader; private final SMBLoader smbLoader;
private final FileLoader fileLoader; private final FileLoader fileLoader;
private final ConcurrentHashMap<DigestURL, Semaphore> loaderSteering; // a map that delivers a 'finish' semaphore for urls private final ConcurrentHashMap<DigestURL, Semaphore> loaderSteering; // a map that delivers a 'finish' semaphore for urls
private final ConcurrentLog log;
public LoaderDispatcher(final Switchboard sb) { public LoaderDispatcher(final Switchboard sb) {
this.sb = sb; this.sb = sb;
this.supportedProtocols = new HashSet<String>(Arrays.asList(new String[]{"http","https","ftp","smb","file"})); this.supportedProtocols = new HashSet<String>(Arrays.asList(new String[]{"http","https","ftp","smb","file"}));
// initiate loader objects // initiate loader objects
this.log = new ConcurrentLog("LOADER"); this.httpLoader = new HTTPLoader(sb, LoaderDispatcher.log);
this.httpLoader = new HTTPLoader(sb, this.log); this.ftpLoader = new FTPLoader(sb, LoaderDispatcher.log);
this.ftpLoader = new FTPLoader(sb, this.log); this.smbLoader = new SMBLoader(sb, LoaderDispatcher.log);
this.smbLoader = new SMBLoader(sb, this.log); this.fileLoader = new FileLoader(sb, LoaderDispatcher.log);
this.fileLoader = new FileLoader(sb, this.log);
this.loaderSteering = new ConcurrentHashMap<DigestURL, Semaphore>(); this.loaderSteering = new ConcurrentHashMap<DigestURL, Semaphore>();
} }
@ -224,7 +223,7 @@ public final class LoaderDispatcher {
// well, just take the cache and don't care about freshness of the content // well, just take the cache and don't care about freshness of the content
final byte[] content = Cache.getContent(url.hash()); final byte[] content = Cache.getContent(url.hash());
if (content != null) { if (content != null) {
this.log.info("cache hit/useall for: " + url.toNormalform(true)); LoaderDispatcher.log.info("cache hit/useall for: " + url.toNormalform(true));
response.setContent(content); response.setContent(content);
return response; return response;
} }
@ -235,14 +234,14 @@ public final class LoaderDispatcher {
if (response.isFreshForProxy()) { if (response.isFreshForProxy()) {
final byte[] content = Cache.getContent(url.hash()); final byte[] content = Cache.getContent(url.hash());
if (content != null) { if (content != null) {
this.log.info("cache hit/fresh for: " + url.toNormalform(true)); LoaderDispatcher.log.info("cache hit/fresh for: " + url.toNormalform(true));
response.setContent(content); response.setContent(content);
return response; return response;
} }
} }
this.log.info("cache hit/stale for: " + url.toNormalform(true)); LoaderDispatcher.log.info("cache hit/stale for: " + url.toNormalform(true));
} else if (cachedResponse != null) { } else if (cachedResponse != null) {
this.log.warn("HTCACHE contained response header, but not content for url " + url.toNormalform(true)); LoaderDispatcher.log.warn("HTCACHE contained response header, but not content for url " + url.toNormalform(true));
} }
} }
@ -266,7 +265,7 @@ public final class LoaderDispatcher {
cleanupAccessTimeTable(untilTime); cleanupAccessTimeTable(untilTime);
if (System.currentTimeMillis() < untilTime) { if (System.currentTimeMillis() < untilTime) {
long frcdslp = untilTime - System.currentTimeMillis(); long frcdslp = untilTime - System.currentTimeMillis();
this.log.info("Forcing sleep of " + frcdslp + " ms for host " + host); LoaderDispatcher.log.info("Forcing sleep of " + frcdslp + " ms for host " + host);
try {Thread.sleep(frcdslp);} catch (final InterruptedException ee) {} try {Thread.sleep(frcdslp);} catch (final InterruptedException ee) {}
} }
} }
@ -310,10 +309,10 @@ public final class LoaderDispatcher {
try { try {
Cache.store(url, response.getResponseHeader(), response.getContent()); Cache.store(url, response.getResponseHeader(), response.getContent());
} catch (final IOException e) { } catch (final IOException e) {
this.log.warn("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e); LoaderDispatcher.log.warn("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e);
} }
} else { } else {
this.log.warn("cannot write " + response.url() + " to Cache (4): " + storeError); LoaderDispatcher.log.warn("cannot write " + response.url() + " to Cache (4): " + storeError);
} }
return response; return response;
} }

@ -46,7 +46,7 @@ import net.yacy.search.schema.CollectionSchema;
public class ErrorCache { public class ErrorCache {
private static ConcurrentLog log = new ConcurrentLog("REJECTED"); private static final ConcurrentLog log = new ConcurrentLog("REJECTED");
private static final int maxStackSize = 1000; private static final int maxStackSize = 1000;
// the class object // the class object

@ -113,7 +113,7 @@ public final class SearchEvent {
} }
*/ */
public static ConcurrentLog log = new ConcurrentLog("SEARCH"); public final static ConcurrentLog log = new ConcurrentLog("SEARCH");
public static final int SNIPPET_MAX_LENGTH = 220; public static final int SNIPPET_MAX_LENGTH = 220;
private static final int MAX_TOPWORDS = 12; // default count of words for topicnavigagtor private static final int MAX_TOPWORDS = 12; // default count of words for topicnavigagtor

@ -44,7 +44,7 @@ import net.yacy.cora.util.ConcurrentLog;
public class gzip { public class gzip {
private static ConcurrentLog logger = new ConcurrentLog("GZIP"); private final static ConcurrentLog logger = new ConcurrentLog("GZIP");
public static void gzipFile(final String inFile, final String outFile) { public static void gzipFile(final String inFile, final String outFile) {
try { try {

Loading…
Cancel
Save