From 2f0d7ea8d3b5aacf206c73a5a2b887bd4e007008 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 9 Jul 2005 00:33:34 +0000 Subject: [PATCH] removed htcache stati (superfluous now) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@396 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/http/httpc.java | 7 ++++ source/de/anomic/http/httpdProxyHandler.java | 24 ++++++------- .../de/anomic/plasma/plasmaCrawlWorker.java | 8 ++--- source/de/anomic/plasma/plasmaHTCache.java | 34 ++++++++++++------- .../anomic/plasma/plasmaWordIndexCache.java | 2 +- 5 files changed, 45 insertions(+), 30 deletions(-) diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index 736802cd7..ee54e1701 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -499,6 +499,13 @@ public final class httpc { return ((status.charAt(0) == '2') || (status.charAt(0) == '3')); } + public byte[] writeContent() throws IOException { + int contentLength = (int) this.responseHeader.contentLength(); + serverByteBuffer sbb = new serverByteBuffer((contentLength==-1)?8192:contentLength); + writeContentX(null, sbb, httpc.this.clientInput); + return sbb.getBytes(); + } + public byte[] writeContent(OutputStream procOS) throws IOException { int contentLength = (int) this.responseHeader.contentLength(); serverByteBuffer sbb = new serverByteBuffer((contentLength==-1)?8192:contentLength); diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 08f99162a..aada07309 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -618,19 +618,19 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt if (sizeBeforeDelete == -1) { // totally fresh file - cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert + //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert cacheEntry.cacheArray = cacheArray; cacheManager.push(cacheEntry); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); } else if (sizeBeforeDelete == cacheArray.length) { // before we came here we deleted a cache entry cacheArray = null; - cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; - cacheManager.push(cacheEntry); // unnecessary update + //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; + //cacheManager.push(cacheEntry); // unnecessary update conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REF_FAIL_HIT"); } else { // before we came here we deleted a cache entry - cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; + //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; cacheEntry.cacheArray = cacheArray; cacheManager.push(cacheEntry); // necessary update, write response header to cache conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); @@ -644,17 +644,17 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt this.theLogger.logDebug("for write-file of " + url + ": contentLength = " + contentLength + ", sizeBeforeDelete = " + sizeBeforeDelete); if (sizeBeforeDelete == -1) { // totally fresh file - cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert + //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert cacheManager.push(cacheEntry); conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); } else if (sizeBeforeDelete == cacheFile.length()) { // before we came here we deleted a cache entry - cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; - cacheManager.push(cacheEntry); // unnecessary update + //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; + //cacheManager.push(cacheEntry); // unnecessary update conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REF_FAIL_HIT"); } else { // before we came here we deleted a cache entry - cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; + //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; cacheManager.push(cacheEntry); // necessary update, write response header to cache conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); } @@ -668,12 +668,12 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); if (sizeBeforeDelete == -1) { // no old file and no load. just data passing - cacheEntry.status = plasmaHTCache.CACHE_PASSING; - cacheManager.push(cacheEntry); + //cacheEntry.status = plasmaHTCache.CACHE_PASSING; + //cacheManager.push(cacheEntry); } else { // before we came here we deleted a cache entry - cacheEntry.status = plasmaHTCache.CACHE_STALE_NO_RELOAD; - cacheManager.push(cacheEntry); + //cacheEntry.status = plasmaHTCache.CACHE_STALE_NO_RELOAD; + //cacheManager.push(cacheEntry); } conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); } diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java index 3596d614e..f0274a74c 100644 --- a/source/de/anomic/plasma/plasmaCrawlWorker.java +++ b/source/de/anomic/plasma/plasmaCrawlWorker.java @@ -324,11 +324,11 @@ public final class plasmaCrawlWorker extends Thread { File cacheFile = cacheManager.getCachePath(url); try { String error = null; - if (!(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime()))) { + if ((!(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime()))) && + (!(plasmaParser.supportedFileExt(url)))) { // if the response has not the right file type then reject file remote.close(); - log.logInfo("REJECTED WRONG MIME TYPE " + res.responseHeader.mime() + " for url " + url.toString()); - htCache.status = plasmaHTCache.CACHE_UNFILLED; + log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for url " + url.toString()); } else { // we write the new cache entry to file system directly cacheFile.getParentFile().mkdirs(); @@ -337,11 +337,11 @@ public final class plasmaCrawlWorker extends Thread { fos = new FileOutputStream(cacheFile); res.writeContent(fos); // superfluous write to array htCache.cacheArray = null; + cacheManager.writeFileAnnouncement(cacheFile); //htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file } finally { if (fos!=null)try{fos.close();}catch(Exception e){} } - htCache.status = plasmaHTCache.CACHE_FILL; } // enQueue new entry with response header if (profile != null) { diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 49dc6e5d4..ffca5e78a 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -86,6 +86,7 @@ public final class plasmaHTCache { public final File cachePath; public static serverLog log; + /* public static final int CACHE_UNFILLED = 0; // default case without assignment public static final int CACHE_FILL = 1; // this means: update == true public static final int CACHE_HIT = 2; // the best case: reading from Cache @@ -93,7 +94,8 @@ public final class plasmaHTCache { public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content public static final int CACHE_PASSING = 6; // does not touch cache, just passing - + */ + public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) { //this.switchboard = switchboard; @@ -156,8 +158,7 @@ public final class plasmaHTCache { responseHeaderDB.set(urlHash, responseHeader); } - public boolean deleteFile(URL url) { - File file = getCachePath(url); + private boolean deleteFile(File file) { if (file.exists()) { currCacheSize -= file.length(); return file.delete(); @@ -166,18 +167,17 @@ public final class plasmaHTCache { } } + public boolean deleteFile(URL url) { + return deleteFile(getCachePath(url)); + } + public boolean writeFile(URL url, byte[] array) { if (array == null) return false; + File file = getCachePath(url); try { - File file = getCachePath(url); - if (file.exists()) { - currCacheSize -= file.length(); - file.delete(); - } + deleteFile(file); file.getParentFile().mkdirs(); serverFileUtils.write(array, file); - currCacheSize += file.length(); - cacheAge.put(ageString(file.lastModified(), file), file); } catch (FileNotFoundException e) { // this is the case of a "(Not a directory)" error, which should be prohibited // by the shallStoreCache() property. However, sometimes the error still occurs @@ -188,10 +188,18 @@ public final class plasmaHTCache { log.logError("File storage failed (IO error): " + e.getMessage()); return false; } - cleanup(); + writeFileAnnouncement(file); return true; } + public void writeFileAnnouncement(File file) { + if (file.exists()) { + currCacheSize += file.length(); + cacheAge.put(ageString(file.lastModified(), file), file); + cleanup(); + } + } + private void cleanup() { // clean up cache to have enough space for next entries File f; @@ -509,7 +517,6 @@ public final class plasmaHTCache { // to be defined later: this.cacheArray = null; - this.status = CACHE_UNFILLED; } public String name() { @@ -534,10 +541,11 @@ public final class plasmaHTCache { } } + /* public boolean update() { return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD)); } - + */ // the following three methods for cache read/write granting shall be as loose as possible // but also as strict as necessary to enable caching of most items diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index df8112c9e..37c8712e7 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -216,7 +216,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { addEntry(wordHash, wordEntry, creationTime); urlCount++; // protect against memory shortage - while (rt.freeMemory() < 1000000) flushFromMem(); + while (rt.freeMemory() < 1000000) {flushFromMem(); java.lang.System.gc();} // write a log if (System.currentTimeMillis() > messageTime) { System.gc(); // for better statistic