From 6aa474f52968b6281e84d86a07344dc8869b2c30 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 1 Oct 2009 13:08:19 +0000 Subject: [PATCH] - better logging for web cache access and fail reasons - better Exception handling for web cache access - distinction between access of web cache for proxy and crawler git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6367 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ViewFile.java | 15 ++- .../crawler/retrieval/LoaderDispatcher.java | 18 +++- .../de/anomic/crawler/retrieval/Response.java | 54 ++++++---- source/de/anomic/http/client/Cache.java | 56 +++++++--- .../anomic/http/server/HTTPDProxyHandler.java | 102 ++++++++++-------- source/de/anomic/search/Switchboard.java | 18 +++- source/de/anomic/server/serverCore.java | 4 +- source/de/anomic/ymage/ymageOSM.java | 7 +- 8 files changed, 178 insertions(+), 96 deletions(-) diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index a24f5a1c4..ae5db1575 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -25,6 +25,7 @@ //javac -classpath .:../Classes Status.java //if the shell's current path is HTROOT +import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; @@ -145,7 +146,12 @@ public class ViewFile { ResponseHeader responseHeader = null; String resMime = null; // trying to load the resource body - resource = Cache.getContentStream(url); + try { + resource = Cache.getContentStream(url); + } catch (IOException e1) { + e1.printStackTrace(); + resource = null; + } resourceLength = Cache.getResourceContentLength(url); responseHeader = Cache.getResponseHeader(url); @@ -162,7 +168,12 @@ public class ViewFile { } if (entry != null) { - resource = Cache.getContentStream(url); + try { + resource = Cache.getContentStream(url); + } catch (IOException e1) { + e1.printStackTrace(); + resource = null; + } resourceLength = Cache.getResourceContentLength(url); } diff --git a/source/de/anomic/crawler/retrieval/LoaderDispatcher.java b/source/de/anomic/crawler/retrieval/LoaderDispatcher.java index 02feb6c2d..fcee4500a 100644 --- a/source/de/anomic/crawler/retrieval/LoaderDispatcher.java +++ b/source/de/anomic/crawler/retrieval/LoaderDispatcher.java @@ -152,7 +152,13 @@ public final class LoaderDispatcher { // now see if there is a cache entry ResponseHeader cachedResponse = (request.url().isLocal()) ? null : Cache.getResponseHeader(request.url()); - byte[] content = (cachedResponse == null) ? null : Cache.getContent(request.url()); + byte[] content = null; + try { + content = (cachedResponse == null) ? null : Cache.getContent(request.url()); + } catch (IOException e) { + e.printStackTrace(); + content = null; + } if (cachedResponse != null && content != null) { // yes we have the content @@ -221,11 +227,15 @@ public final class LoaderDispatcher { if (protocol.equals("ftp")) response = ftpLoader.load(request); if (response != null) { // we got something. Now check if we want to store that to the cache - String storeError = response.shallStoreCache(); + String storeError = response.shallStoreCacheForCrawler(); if (storeError == null) { - Cache.store(request.url(), response.getResponseHeader(), response.getContent()); + try { + Cache.store(request.url(), response.getResponseHeader(), response.getContent()); + } catch (IOException e) { + log.logWarning("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e); + } } else { - if (Cache.log.isFine()) Cache.log.logFine("no storage of url " + request.url() + ": " + storeError); + log.logWarning("cannot write " + response.url() + " to Cache (4): " + storeError); } return response; } diff --git a/source/de/anomic/crawler/retrieval/Response.java b/source/de/anomic/crawler/retrieval/Response.java index 3e251217d..071383ee3 100755 --- a/source/de/anomic/crawler/retrieval/Response.java +++ b/source/de/anomic/crawler/retrieval/Response.java @@ -253,8 +253,11 @@ public class Response { * @return NULL if the answer is TRUE, in case of FALSE, the reason as * String is returned */ - public String shallStoreCache() { + public String shallStoreCacheForProxy() { + String crawlerReason = shallStoreCacheForCrawler(); + if (crawlerReason != null) return crawlerReason; + // check profile (disabled: we will check this in the plasmaSwitchboard) // if (!this.profile.storeHTCache()) { return "storage_not_wanted"; } @@ -262,15 +265,6 @@ public class Response { // the cache or not // if the storage was requested by prefetching, the request map is null - // check storage size: all files will be handled in RAM before storage, so they must not exceed - // a given size, which we consider as 1MB - if (this.size() > 10 * 1024L * 1024L) return "too_large_for_caching_" + this.size(); - - // check status code - if (!validResponseStatus()) { - return "bad_status_" + this.responseStatus.substring(0, 3); - } - // -CGI access in request // CGI access makes the page very individual, and therefore not usable // in caches @@ -286,20 +280,7 @@ public class Response { return "local_URL_no_cache_needed"; } - if (requestHeader != null) { - // -authorization cases in request - // authorization makes pages very individual, and therefore we cannot use the - // content in the cache - if (requestHeader.containsKey(RequestHeader.AUTHORIZATION)) { return "personalized"; } - // -ranges in request and response - // we do not cache partial content - if (requestHeader.containsKey(HeaderFramework.RANGE)) { return "partial"; } - } - if (responseHeader != null) { - // -ranges in request and response - // we do not cache partial content - if (responseHeader.containsKey(HeaderFramework.CONTENT_RANGE)) { return "partial"; } // -if-modified-since in request // we do not care about if-modified-since, because this case only occurres if the @@ -348,6 +329,33 @@ public class Response { return null; } + public String shallStoreCacheForCrawler() { + // check storage size: all files will be handled in RAM before storage, so they must not exceed + // a given size, which we consider as 1MB + if (this.size() > 10 * 1024L * 1024L) return "too_large_for_caching_" + this.size(); + + // check status code + if (!validResponseStatus()) { + return "bad_status_" + this.responseStatus.substring(0, 3); + } + + if (requestHeader != null) { + // -authorization cases in request + // authorization makes pages very individual, and therefore we cannot use the + // content in the cache + if (requestHeader.containsKey(RequestHeader.AUTHORIZATION)) { return "personalized"; } + // -ranges in request and response + // we do not cache partial content + if (requestHeader.containsKey(HeaderFramework.RANGE)) { return "partial_request"; } + } + + if (responseHeader != null) { + // -ranges in request and response + // we do not cache partial content + if (responseHeader.containsKey(HeaderFramework.CONTENT_RANGE)) { return "partial_response"; } + } + return null; + } /** * decide upon header information if a specific file should be taken from diff --git a/source/de/anomic/http/client/Cache.java b/source/de/anomic/http/client/Cache.java index fe4cf02e6..95c0e1854 100644 --- a/source/de/anomic/http/client/Cache.java +++ b/source/de/anomic/http/client/Cache.java @@ -38,6 +38,7 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.Map; @@ -111,32 +112,55 @@ public final class Cache { fileDB.close(true); } - public static void store(yacyURL url, final ResponseHeader responseHeader, byte[] file) { - if (responseHeader != null && file != null) try { - // store the response header into the header database - final HashMap hm = new HashMap(); - hm.putAll(responseHeader); - hm.put("@@URL", url.toNormalform(true, false)); + public static void store(yacyURL url, final ResponseHeader responseHeader, byte[] file) throws IOException { + if (responseHeader == null) throw new IOException("Cache.store of url " + url.toString() + " not possible: responseHeader == null"); + if (file == null) throw new IOException("Cache.store of url " + url.toString() + " not possible: file == null"); + + // store the response header into the header database + final HashMap hm = new HashMap(); + hm.putAll(responseHeader); + hm.put("@@URL", url.toNormalform(true, false)); + try { responseHeaderDB.put(url.hash(), hm); + } catch (IOException e) { + throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage()); + } + + // store the file + try { fileDB.put(url.hash().getBytes("UTF-8"), file); - if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false)); + } catch (UnsupportedEncodingException e) { + throw new IOException("Cache.store: cannot write to fileDB (1): " + e.getMessage()); } catch (IOException e) { - e.printStackTrace(); + throw new IOException("Cache.store: cannot write to fileDB (2): " + e.getMessage()); } + if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false)); } /** * check if the responseHeaderDB and the fileDB has an entry for the given url * @param url the url of the resource - * @return true if the content of the url is in the cache, false othervise + * @return true if the content of the url is in the cache, false otherwise */ public static boolean has(final yacyURL url) { + boolean headerExists; try { - return responseHeaderDB.has(url.hash()) && fileDB.has(url.hash().getBytes()); - } catch (IOException e) { - e.printStackTrace(); + headerExists = responseHeaderDB.has(url.hash()); + } catch (IOException e1) { + try { + fileDB.remove(url.hash().getBytes()); + } catch (IOException e) {} return false; } + boolean fileExists = fileDB.has(url.hash().getBytes()); + if (headerExists && fileExists) return true; + if (headerExists) try { + responseHeaderDB.remove(url.hash()); + } catch (IOException e) {} + if (fileExists) try { + fileDB.remove(url.hash().getBytes()); + } catch (IOException e) {} + return false; } /** @@ -168,8 +192,9 @@ public final class Cache { * @return the resource content as {@link InputStream}. In no data * is available or the cached file is not readable, null * is returned. + * @throws IOException */ - public static InputStream getContentStream(final yacyURL url) { + public static InputStream getContentStream(final yacyURL url) throws IOException { // load the url as resource from the cache byte[] b = getContent(url); if (b == null) return null; @@ -182,12 +207,13 @@ public final class Cache { * @return the resource content as byte[]. In no data * is available or the cached file is not readable, null * is returned. + * @throws IOException */ - public static byte[] getContent(final yacyURL url) { + public static byte[] getContent(final yacyURL url) throws IOException { // load the url as resource from the cache try { return fileDB.get(url.hash().getBytes("UTF-8")); - } catch (IOException e) { + } catch (UnsupportedEncodingException e) { e.printStackTrace(); return null; } diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java index 67cad0175..805f939b7 100644 --- a/source/de/anomic/http/server/HTTPDProxyHandler.java +++ b/source/de/anomic/http/server/HTTPDProxyHandler.java @@ -117,7 +117,7 @@ public final class HTTPDProxyHandler { //private Properties connectionProperties = null; // creating a logger - private static final Log theLogger = new Log("PROXY"); + private static final Log log = new Log("PROXY"); private static boolean doAccessLogging = false; /** @@ -126,7 +126,7 @@ public final class HTTPDProxyHandler { static { // Doing logger initialization try { - theLogger.logInfo("Configuring proxy access logging ..."); + log.logInfo("Configuring proxy access logging ..."); // getting the logging manager final LogManager manager = LogManager.getLogManager(); @@ -158,15 +158,15 @@ public final class HTTPDProxyHandler { proxyLogger.addHandler(txtLog); doAccessLogging = true; - theLogger.logInfo("Proxy access logging configuration done." + + log.logInfo("Proxy access logging configuration done." + "\n\tFilename: " + pattern + "\n\tLimit: " + limitStr + "\n\tCount: " + countStr); } else { - theLogger.logInfo("Proxy access logging is deactivated."); + log.logInfo("Proxy access logging is deactivated."); } } catch (final Exception e) { - theLogger.logSevere("Unable to configure proxy access logging.",e); + log.logSevere("Unable to configure proxy access logging.",e); } sb = Switchboard.getSwitchboard(); @@ -192,7 +192,7 @@ public final class HTTPDProxyHandler { final String f = sb.getConfig("proxyYellowList", null); if (f != null) { yellowList = FileUtils.loadList(new File(f)); - theLogger.logConfig("loaded yellow-list from file " + f + ", " + yellowList.size() + " entries"); + log.logConfig("loaded yellow-list from file " + f + ", " + yellowList.size() + " entries"); } else { yellowList = new HashSet(); } @@ -307,8 +307,8 @@ public final class HTTPDProxyHandler { yacyURL url = null; try { url = HeaderFramework.getRequestURL(conProp); - if (theLogger.isFine()) theLogger.logFine(reqID +" GET "+ url); - if (theLogger.isFinest()) theLogger.logFinest(reqID +" header: "+ requestHeader); + if (log.isFine()) log.logFine(reqID +" GET "+ url); + if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader); //redirector if (redirectorEnabled){ @@ -322,7 +322,7 @@ public final class HTTPDProxyHandler { url = new yacyURL(newUrl, null); } catch(final MalformedURLException e){}//just keep the old one } - if (theLogger.isFinest()) theLogger.logFinest(reqID +" using redirector to "+ url); + if (log.isFinest()) log.logFinest(reqID +" using redirector to "+ url); conProp.setProperty(HeaderFramework.CONNECTION_PROP_HOST, url.getHost()+":"+url.getPort()); conProp.setProperty(HeaderFramework.CONNECTION_PROP_PATH, url.getPath()); requestHeader.put(HeaderFramework.HOST, url.getHost()+":"+url.getPort()); @@ -331,7 +331,7 @@ public final class HTTPDProxyHandler { } catch (final MalformedURLException e) { final String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; - theLogger.logSevere(errorMsg); + log.logSevere(errorMsg); HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e); return; } @@ -349,7 +349,7 @@ public final class HTTPDProxyHandler { final String hostlow = host.toLowerCase(); if (args != null) { path = path + "?" + args; } if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { - theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); + log.logInfo("AGIS blocking of host '" + hostlow + "'"); HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); return; @@ -383,7 +383,7 @@ public final class HTTPDProxyHandler { // in two of these cases we trigger a scheduler to handle newly arrived files: // case 1 and case 3 if (cachedResponseHeader == null) { - if (theLogger.isFinest()) theLogger.logFinest(reqID + " page not in cache: fulfill request from web"); + if (log.isFinest()) log.logFinest(reqID + " page not in cache: fulfill request from web"); fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond); } else { final Request request = new Request( @@ -406,10 +406,10 @@ public final class HTTPDProxyHandler { ); byte[] cacheContent = Cache.getContent(url); if (cacheContent != null && response.isFreshForProxy()) { - if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from cache"); + if (log.isFinest()) log.logFinest(reqID + " fulfill request from cache"); fulfillRequestFromCache(conProp, url, requestHeader, cachedResponseHeader, cacheContent, countedRespond); } else { - if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from web"); + if (log.isFinest()) log.logFinest(reqID + " fulfill request from web"); fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond); } } @@ -423,7 +423,7 @@ public final class HTTPDProxyHandler { } else if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { final String errorMsg = "Unexpected Error. " + e.getClass().getName() + ": " + e.getMessage(); HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e); - theLogger.logSevere(errorMsg); + log.logSevere(errorMsg); } else { forceConnectionClose(conProp); } @@ -484,7 +484,7 @@ public final class HTTPDProxyHandler { // send request try { res = client.GET(getUrl); - if (theLogger.isFinest()) theLogger.logFinest(reqID +" response status: "+ res.getStatusLine()); + if (log.isFinest()) log.logFinest(reqID +" response status: "+ res.getStatusLine()); conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader); final ResponseHeader responseHeader = res.getResponseHeader(); @@ -528,7 +528,7 @@ public final class HTTPDProxyHandler { responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); } - if (theLogger.isFinest()) theLogger.logFinest(reqID +" sending response header: "+ responseHeader); + if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); HTTPDemon.sendRespondHeader( conProp, respond, @@ -547,7 +547,7 @@ public final class HTTPDProxyHandler { res.getStatusLine(), sb.crawler.defaultProxyProfile ); - final String storeError = response.shallStoreCache(); + final String storeError = response.shallStoreCacheForProxy(); final boolean storeHTCache = response.profile().storeHTCache(); final String supportError = Parser.supports(response.url(), response.getMimeType()); if ( @@ -575,13 +575,17 @@ public final class HTTPDProxyHandler { } else { cacheArray = null; } - if (theLogger.isFine()) theLogger.logFine(reqID +" writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length))); + if (log.isFine()) log.logFine(reqID +" writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length))); if (sizeBeforeDelete == -1) { // totally fresh file response.setContent(cacheArray); - Cache.store(response.url(), response.getResponseHeader(), cacheArray); - sb.toIndexer(response); + try { + Cache.store(response.url(), response.getResponseHeader(), cacheArray); + sb.toIndexer(response); + } catch (IOException e) { + log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e); + } conProp.setProperty(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS"); } else if (cacheArray != null && sizeBeforeDelete == cacheArray.length) { // before we came here we deleted a cache entry @@ -591,13 +595,17 @@ public final class HTTPDProxyHandler { } else { // before we came here we deleted a cache entry response.setContent(cacheArray); - Cache.store(response.url(), response.getResponseHeader(), cacheArray); - sb.toIndexer(response); + try { + Cache.store(response.url(), response.getResponseHeader(), cacheArray); + sb.toIndexer(response); + } catch (IOException e) { + log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e); + } conProp.setProperty(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS"); } } else { // no caching - if (theLogger.isFine()) theLogger.logFine(reqID +" "+ url.toString() + " not cached." + + if (log.isFine()) log.logFine(reqID +" "+ url.toString() + " not cached." + " StoreError=" + ((storeError==null)?"None":storeError) + " StoreHTCache=" + storeHTCache + " SupportError=" + supportError); @@ -687,7 +695,7 @@ public final class HTTPDProxyHandler { if (requestHeader.containsKey(RequestHeader.IF_MODIFIED_SINCE)) { // conditional request: freshness of cache for that condition was already // checked within shallUseCache(). Now send only a 304 response - theLogger.logInfo("CACHE HIT/304 " + url.toString()); + log.logInfo("CACHE HIT/304 " + url.toString()); conProp.setProperty(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_HIT"); // setting the content length header to 0 @@ -698,7 +706,7 @@ public final class HTTPDProxyHandler { //respondHeader(respond, "304 OK", cachedResponseHeader); // respond with 'not modified' } else { // unconditional request: send content of cache - theLogger.logInfo("CACHE HIT/203 " + url.toString()); + log.logInfo("CACHE HIT/203 " + url.toString()); conProp.setProperty(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_HIT"); // setting the content header to the proper length @@ -722,7 +730,7 @@ public final class HTTPDProxyHandler { // this happens if the client stops loading the file // we do nothing here if (conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { - theLogger.logWarning("Error while trying to send cached message body."); + log.logWarning("Error while trying to send cached message body."); conProp.setProperty(HeaderFramework.CONNECTION_PROP_PERSISTENT,"close"); } else { HTTPDemon.sendRespondError(conProp,respond,4,503,"socket error: " + e.getMessage(),"socket error: " + e.getMessage(), e); @@ -766,12 +774,12 @@ public final class HTTPDProxyHandler { } catch (final MalformedURLException e) { final String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; - theLogger.logSevere(errorMsg); + log.logSevere(errorMsg); HTTPDemon.sendRespondError(conProp,respond,4,501,null,errorMsg,e); return; } - if (theLogger.isFine()) theLogger.logFine(reqID +" HEAD "+ url); - if (theLogger.isFinest()) theLogger.logFinest(reqID +" header: "+ requestHeader); + if (log.isFine()) log.logFine(reqID +" HEAD "+ url); + if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader); // check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers final String hostlow = host.toLowerCase(); @@ -782,7 +790,7 @@ public final class HTTPDProxyHandler { if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, remotePath)) { HTTPDemon.sendRespondError(conProp,respond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); - theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); + log.logInfo("AGIS blocking of host '" + hostlow + "'"); return; } @@ -802,14 +810,14 @@ public final class HTTPDProxyHandler { // generate request-url final String connectHost = hostPart(host, port, yAddress); final String getUrl = "http://"+ connectHost + remotePath; - if (theLogger.isFinest()) theLogger.logFinest(reqID +" using url: "+ getUrl); + if (log.isFinest()) log.logFinest(reqID +" using url: "+ getUrl); final Client client = setupHttpClient(requestHeader, connectHost); // send request try { res = client.HEAD(getUrl); - if (theLogger.isFinest()) theLogger.logFinest(reqID +" response status: "+ res.getStatusLine()); + if (log.isFinest()) log.logFinest(reqID +" response status: "+ res.getStatusLine()); // determine if it's an internal error of the httpc final ResponseHeader responseHeader = res.getResponseHeader(); @@ -820,7 +828,7 @@ public final class HTTPDProxyHandler { prepareResponseHeader(responseHeader, res.getHttpVer()); // sending the server respond back to the client - if (theLogger.isFinest()) theLogger.logFinest(reqID +" sending response header: "+ responseHeader); + if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); HTTPDemon.sendRespondHeader(conProp,respond,httpVer,res.getStatusCode(),res.getStatusLine().substring(4),responseHeader); respond.flush(); } finally { @@ -869,12 +877,12 @@ public final class HTTPDProxyHandler { } catch (final MalformedURLException e) { final String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; - theLogger.logSevere(errorMsg); + log.logSevere(errorMsg); HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e); return; } - if (theLogger.isFine()) theLogger.logFine(reqID +" POST "+ url); - if (theLogger.isFinest()) theLogger.logFinest(reqID +" header: "+ requestHeader); + if (log.isFine()) log.logFine(reqID +" POST "+ url); + if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader); prepareRequestHeader(conProp, requestHeader, host.toLowerCase()); @@ -893,13 +901,13 @@ public final class HTTPDProxyHandler { final String connectHost = hostPart(host, port, yAddress); final String getUrl = "http://"+ connectHost + remotePath; - if (theLogger.isFinest()) theLogger.logFinest(reqID +" using url: "+ getUrl); + if (log.isFinest()) log.logFinest(reqID +" using url: "+ getUrl); final Client client = setupHttpClient(requestHeader, connectHost); // check input if(body == null) { - theLogger.logSevere("no body to POST!"); + log.logSevere("no body to POST!"); } // from old httpc: // "if there is a body to the call, we would have a CONTENT-LENGTH tag in the requestHeader" @@ -925,7 +933,7 @@ public final class HTTPDProxyHandler { try { // sending the request res = client.POST(getUrl, body); - if (theLogger.isFinest()) theLogger.logFinest(reqID +" response status: "+ res.getStatusLine()); + if (log.isFinest()) log.logFinest(reqID +" response status: "+ res.getStatusLine()); final ResponseHeader responseHeader = res.getResponseHeader(); // determine if it's an internal error of the httpc @@ -943,7 +951,7 @@ public final class HTTPDProxyHandler { } // sending response headers - if (theLogger.isFinest()) theLogger.logFinest(reqID +" sending response header: "+ responseHeader); + if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); HTTPDemon.sendRespondHeader(conProp, countedRespond, httpVer, @@ -1230,7 +1238,7 @@ public final class HTTPDProxyHandler { if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { HTTPDemon.sendRespondError(conProp,clientOut,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); - theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); + log.logInfo("AGIS blocking of host '" + hostlow + "'"); forceConnectionClose(conProp); return; } @@ -1249,7 +1257,7 @@ public final class HTTPDProxyHandler { try { response = remoteProxy.CONNECT(host, port); // outputs a logline to the serverlog with the current status - theLogger.logInfo("CONNECT-RESPONSE: status=" + response.getStatusLine() + ", header=" + response.getResponseHeader().toString()); + log.logInfo("CONNECT-RESPONSE: status=" + response.getStatusLine() + ", header=" + response.getResponseHeader().toString()); // (response.getStatusLine().charAt(0) == '2') || (response.getStatusLine().charAt(0) == '3') final boolean success = response.getStatusCode() >= 200 && response.getStatusCode() <= 399; if (success) { @@ -1289,7 +1297,7 @@ public final class HTTPDProxyHandler { "Proxy-agent: YACY" + serverCore.CRLF_STRING + serverCore.CRLF_STRING).getBytes()); - theLogger.logInfo("SSL connection to " + host + ":" + port + " established."); + log.logInfo("SSL connection to " + host + ":" + port + " established."); // start stream passing with mediate processes final Mediate cs = new Mediate(sslSocket, clientIn, promiscuousOut); @@ -1398,7 +1406,7 @@ public final class HTTPDProxyHandler { final String exceptionMsg = e.getMessage(); if ((exceptionMsg != null) && (exceptionMsg.indexOf("Corrupt GZIP trailer") >= 0)) { // just do nothing, we leave it this way - if (theLogger.isFine()) theLogger.logFine("ignoring bad gzip trail for URL " + url + " (" + e.getMessage() + ")"); + if (log.isFine()) log.logFine("ignoring bad gzip trail for URL " + url + " (" + e.getMessage() + ")"); forceConnectionClose(conProp); } else if ((exceptionMsg != null) && (exceptionMsg.indexOf("Connection reset")>= 0)) { errorMessage = "Connection reset"; @@ -1436,12 +1444,12 @@ public final class HTTPDProxyHandler { } } else { if (unknownError) { - theLogger.logSevere("Unknown Error while processing request '" + + log.logSevere("Unknown Error while processing request '" + conProp.getProperty(HeaderFramework.CONNECTION_PROP_REQUESTLINE,"unknown") + "':" + "\n" + Thread.currentThread().getName() + "\n" + errorMessage,e); } else { - theLogger.logWarning("Error while processing request '" + + log.logWarning("Error while processing request '" + conProp.getProperty(HeaderFramework.CONNECTION_PROP_REQUESTLINE,"unknown") + "':" + "\n" + Thread.currentThread().getName() + "\n" + errorMessage); diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 5b2fd5cd2..c39400a49 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -1582,10 +1582,24 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi // PARSE CONTENT final long parsingStartTime = System.currentTimeMillis(); - + byte[] b = null; + try { + // fetch the document + b = Cache.getContent(entry.url()); + if (b == null) { + this.log.logWarning("the resource '" + entry.url() + "' is missing in the cache."); + addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.name(), "missing"); + return null; + } + } catch (IOException e) { + this.log.logWarning("Unable fetch the resource '" + entry.url() + "'. from the cache: " + e.getMessage()); + addURLtoErrorDB(entry.url(), entry.referrerHash(), entry.initiator(), entry.name(), e.getMessage()); + return null; + } + try { // parse the document - document = Parser.parseSource(entry.url(), entry.getMimeType(), entry.getCharacterEncoding(), Cache.getContent(entry.url())); + document = Parser.parseSource(entry.url(), entry.getMimeType(), entry.getCharacterEncoding(), b); assert(document != null) : "Unexpected error. Parser returned null."; } catch (final ParserException e) { this.log.logWarning("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage()); diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 0f91f88a6..6c98d9f0f 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -853,10 +853,10 @@ public final class serverCore extends serverAbstractBusyThread implements server if (bufferSize == 0 && b == -1) return null; return readLineBuffer.getBytes(); } catch (final ClosedByInterruptException e) { - if (logerr) Log.logSevere("SERVER", "receive interrupted - timeout"); + if (logerr) Log.logWarning("SERVER", "receive interrupted"); return null; } catch (final IOException e) { - if (logerr) Log.logSevere("SERVER", "receive interrupted - exception 2 = " + e.getMessage()); + if (logerr) Log.logWarning("SERVER", "receive closed by IOException: " + e.getMessage()); return null; } finally { try { diff --git a/source/de/anomic/ymage/ymageOSM.java b/source/de/anomic/ymage/ymageOSM.java index d1425f1ce..27ef00ede 100644 --- a/source/de/anomic/ymage/ymageOSM.java +++ b/source/de/anomic/ymage/ymageOSM.java @@ -79,7 +79,12 @@ public class ymageOSM { return null; } //System.out.println("*** DEBUG: fetching OSM tile: " + tileURL.toNormalform(true, true)); - InputStream tileStream = Cache.getContentStream(tileURL); + InputStream tileStream = null; + try { + tileStream = Cache.getContentStream(tileURL); + } catch (IOException e1) { + e1.printStackTrace(); + } if (tileStream == null) { // download resource using the crawler and keep resource in memory if possible Response entry = null;