diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 4730f5ae3..916d924ba 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -625,8 +625,21 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt res.statusText, res.responseHeader); - String storeError; - if ((storeError = cacheEntry.shallStoreCacheForProxy()) == null) { + String storeError = cacheEntry.shallStoreCacheForProxy(); + boolean storeHTCache = cacheEntry.profile.storeHTCache(); + boolean isSupportedContent = plasmaParser.supportedContent(plasmaParser.PARSER_MODE_PROXY,cacheEntry.url,cacheEntry.responseHeader.mime()); + if ( + /* + * Now we store the response into the htcache directory if + * a) the response is cacheable AND + */ + (storeError == null) && + /* + * b) the user has configured to use the htcache OR + * c) the content should be indexed + */ + ((storeHTCache) || (isSupportedContent)) + ) { // we write a new cache entry if ((contentLength > 0) && (contentLength < 1048576)) // if the length is known and < 1 MB { @@ -684,7 +697,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt } } else { // no caching - this.theLogger.logFine(cacheFile.toString() + " not cached: " + storeError); + this.theLogger.logFine(cacheFile.toString() + " not cached." + + " StoreError=" + ((storeError==null)?"None":storeError) + + " StoreHTCache=" + storeHTCache + + " SupportetContent=" + isSupportedContent); + res.writeContent(hfos, null); if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); if (sizeBeforeDelete == -1) { diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 37f06ffd7..c2f76e8cd 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -785,8 +785,8 @@ public final class plasmaHTCache { // returns NULL if the answer is TRUE // in case of FALSE, the reason as String is returned - // check profile - if (!this.profile.storeHTCache()) { return "storage_not_wanted"; } + // check profile (disabled: we will check this in the plasmaSwitchboard) + //if (!this.profile.storeHTCache()) { return "storage_not_wanted"; } // decide upon header information if a specific file should be stored to the cache or not // if the storage was requested by prefetching, the request map is null diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 0ff87ce40..90bcab158 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -743,19 +743,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser synchronized public boolean htEntryStoreProcess(plasmaHTCache.Entry entry) throws IOException { if (entry == null) return false; - - // store response header - if (entry.responseHeader != null) { - this.cacheManager.storeHeader(entry.nomalizedURLHash, entry.responseHeader); - this.log.logInfo("WROTE HEADER for " + entry.cacheFile); - } - - /* - * Evaluating request header: + + /* ========================================================================= + * PARSER SUPPORT + * + * Testing if the content type is supported by the available parsers + * ========================================================================= */ + boolean isSupportedContent = (entry.responseHeader != null) && + plasmaParser.supportedContent(entry.url,entry.responseHeader.mime()); + + /* ========================================================================= + * INDEX CONTROL HEADER + * * With the X-YACY-Index-Control header set to "no-index" a client could disallow * yacy to index the response returned as answer to a request - */ - boolean doIndexing = true; + * ========================================================================= */ + boolean doIndexing = true; if (entry.requestHeader != null) { if ( (entry.requestHeader.containsKey(httpHeader.X_YACY_INDEX_CONTROL)) && @@ -763,9 +766,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ) { doIndexing = false; } - } + } - // check if ip is local ip address + /* ========================================================================= + * LOCAL IP ADDRESS CHECK + * + * check if ip is local ip address + * ========================================================================= */ InetAddress hostAddress = httpc.dnsResolve(entry.url.getHost()); if (hostAddress == null) { this.log.logFine("Unknown host in URL '" + entry.url + "'. Will not be indexed."); @@ -778,20 +785,41 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser doIndexing = false; } - // work off unwritten files - if (entry.cacheArray == null) { - this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile); - } else { - String error = entry.shallStoreCacheForProxy(); - if (error == null) { - this.cacheManager.writeFile(entry.url, entry.cacheArray); - this.log.logFine("WROTE FILE (" + entry.cacheArray.length + " bytes) for " + entry.cacheFile); + /* ========================================================================= + * STORING DATA + * + * Now we store the response header and response content if + * a) the user has configured to use the htcache or + * b) the content should be indexed + * ========================================================================= */ + if ( + (entry.profile.storeHTCache()) || + (doIndexing && isSupportedContent) + ) { + // store response header + if (entry.responseHeader != null) { + this.cacheManager.storeHeader(entry.nomalizedURLHash, entry.responseHeader); + this.log.logInfo("WROTE HEADER for " + entry.cacheFile); + } + + // work off unwritten files + if (entry.cacheArray == null) { + this.log.logFine("EXISTING FILE (" + entry.cacheFile.length() + " bytes) for " + entry.cacheFile); } else { - this.log.logFine("WRITE OF FILE " + entry.cacheFile + " FORBIDDEN: " + error); + String error = entry.shallStoreCacheForProxy(); + if (error == null) { + this.cacheManager.writeFile(entry.url, entry.cacheArray); + this.log.logFine("WROTE FILE (" + entry.cacheArray.length + " bytes) for " + entry.cacheFile); + } else { + this.log.logFine("WRITE OF FILE " + entry.cacheFile + " FORBIDDEN: " + error); + } } } - if ((doIndexing) && plasmaParser.supportedContent(entry.url,entry.responseHeader.mime())){ + /* ========================================================================= + * INDEXING + * ========================================================================= */ + if (doIndexing && isSupportedContent){ // registering the cachefile as in use if (entry.cacheFile.exists()) { @@ -804,6 +832,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser entry.initiator(), entry.depth, entry.profile.handle(), entry.name() )); + } else { + if (!entry.profile.storeHTCache() && entry.cacheFile.exists()) { + this.cacheManager.deleteFile(entry.url); + } } return true;