diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 56292465b..9fb6becf3 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -370,7 +370,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt File cacheFile = cacheManager.getCachePath(url); String urlHash = plasmaURL.urlHash(url); httpHeader cachedResponseHeader = cacheManager.getCachedResponse(urlHash); - boolean cacheExists = ((cacheFile.exists()) && (cacheFile.isFile()) && (cachedResponseHeader != null)); + boolean cacheExists = ((cacheFile.isFile()) && (cachedResponseHeader != null)); // why are files unzipped upon arrival? why not zip all files in cache? // This follows from the following premises @@ -519,10 +519,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt // the cache does either not exist or is (supposed to be) stale long sizeBeforeDelete = -1; - if ((cacheFile.exists()) && (cacheFile.isFile()) && (cachedResponseHeader != null)) { + if ((cacheFile.isFile()) && (cachedResponseHeader != null)) { // delete the cache sizeBeforeDelete = cacheFile.length(); - cacheManager.deleteFile(url); + cacheFile.delete(); + //cacheManager.deleteFile(url); // Hermes conProp.setProperty(httpd.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); } diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java index 49a4248c0..ebfd775d9 100644 --- a/source/de/anomic/plasma/plasmaCrawlWorker.java +++ b/source/de/anomic/plasma/plasmaCrawlWorker.java @@ -353,9 +353,9 @@ public final class plasmaCrawlWorker extends Thread { remote.close(); log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for URL " + url.toString()); } else { - if (cacheFile.isFile()) { - cacheManager.deleteFile(url); - } + //if (cacheFile.isFile()) { // Hermes + // cacheManager.deleteFile(url); + //} // we write the new cache entry to file system directly cacheFile.getParentFile().mkdirs(); FileOutputStream fos = null; diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 10221bafb..44911f00f 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -157,6 +157,38 @@ public final class plasmaHTCache { responseHeaderDB.set(urlHash, responseHeader); } + public boolean writeFile(URL url, byte[] array) { + if (array == null) return false; + File file = getCachePath(url); + try { + deleteFile(file); + file.getParentFile().mkdirs(); + serverFileUtils.write(array, file); + } catch (FileNotFoundException e) { + // this is the case of a "(Not a directory)" error, which should be prohibited + // by the shallStoreCache() property. However, sometimes the error still occurs + // In this case do nothing. + log.logSevere("File storage failed (not a directory): " + e.getMessage()); + return false; + } catch (IOException e) { + log.logSevere("File storage failed (IO error): " + e.getMessage()); + return false; + } + writeFileAnnouncement(file); + return true; + } + + public void writeFileAnnouncement(File file) { + synchronized (cacheAge) { + if (file.exists()) { + currCacheSize += file.length(); + cacheAge.put(ageString(file.lastModified(), file), file); + cleanup(); + } + } + } + +/* private boolean deleteFile(File file) { if (file.exists()) { long size = file.length(); @@ -183,7 +215,7 @@ public final class plasmaHTCache { return false; } } - + private boolean deleteURLfromCache (URL url, String msg) { if (deleteFileandDirs(getCachePath(url), msg)) { try { @@ -197,43 +229,7 @@ public final class plasmaHTCache { } else { return false; } - } - - public boolean deleteFile(URL url) { - return deleteURLfromCache(url, "FROM"); - } - - public boolean writeFile(URL url, byte[] array) { - if (array == null) return false; - File file = getCachePath(url); - try { - deleteFile(file); - file.getParentFile().mkdirs(); - serverFileUtils.write(array, file); - } catch (FileNotFoundException e) { - // this is the case of a "(Not a directory)" error, which should be prohibited - // by the shallStoreCache() property. However, sometimes the error still occurs - // In this case do nothing. - log.logSevere("File storage failed (not a directory): " + e.getMessage()); - return false; - } catch (IOException e) { - log.logSevere("File storage failed (IO error): " + e.getMessage()); - return false; - } - writeFileAnnouncement(file); - return true; - } - - public void writeFileAnnouncement(File file) { - synchronized (cacheAge) { - if (file.exists()) { - currCacheSize += file.length(); - cacheAge.put(ageString(file.lastModified(), file), file); - cleanup(); - } - } - } - + } private void cleanupDoIt(long newCacheSize) { File f; while ((currCacheSize >= newCacheSize) && (cacheAge.size() > 0)) { @@ -254,11 +250,52 @@ public final class plasmaHTCache { } } } +*/ + private boolean deleteFile(File file) { + long size = file.length(); + if (file.exists()) { + currCacheSize -= size; + return file.delete(); + } else { + return false; + } + } + + public boolean deleteFile(URL url) { + return deleteFile(getCachePath(url)); + } + + private void cleanupCache(long newCacheSize) { + File object; + long size; + while (currCacheSize > maxCacheSize && cacheAge.size() > 0) { + object = (File) cacheAge.remove(cacheAge.firstKey()); + if (object != null) { + size = object.length(); + if (object.isFile() && object.delete()) { + currCacheSize -= size; + log.logInfo("DELETED OLD CACHE: " + object.toString()); + object = object.getParentFile(); + if (object.isDirectory() && object.list().length == 0) { + if (object.delete()) { + try { + log.logInfo("DELETED EMPTY DIRECTORY: " + object.toString()); + responseHeaderDB.remove(plasmaURL.urlHash(getURL(cachePath , object))); + } catch (IOException e) { + log.logWarning("HTCACHE: IOExeption removing response header from DB: " + e.getMessage()); + } + } + } + } + } + } + } + private void cleanup() { - // clean up cache to have 3% (enough) space for next entries + // clean up cache to have 4% (enough) space for next entries if ((currCacheSize >= maxCacheSize) && (cacheAge.size() > 0)) { - if (maxCacheSize > 0) cleanupDoIt(maxCacheSize - ((maxCacheSize / 100) * 3)); + if (maxCacheSize > 0) cleanupCache(maxCacheSize - ((maxCacheSize / 100) * 4)); } }