From b5346141b3df845d50b8e0569ce39d806e0e436d Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 15 Aug 2007 21:31:31 +0000 Subject: [PATCH] made the plasmaHTCache static (there is only one internet, so we need only one cache) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4045 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/CacheAdmin_p.java | 8 +- htroot/CrawlResults.java | 5 +- htroot/IndexCreateIndexingQueue_p.java | 3 +- htroot/ViewFile.java | 12 +- source/de/anomic/http/httpdProxyHandler.java | 23 +- source/de/anomic/icap/icapd.java | 10 +- .../plasma/crawler/AbstractCrawlWorker.java | 9 +- .../plasma/crawler/ftp/CrawlWorker.java | 14 +- .../plasma/crawler/http/CrawlWorker.java | 20 +- .../plasma/crawler/plasmaCrawlerFactory.java | 6 - .../de/anomic/plasma/plasmaCrawlLoader.java | 9 +- source/de/anomic/plasma/plasmaHTCache.java | 263 +++++++++--------- .../de/anomic/plasma/plasmaSnippetCache.java | 33 +-- .../de/anomic/plasma/plasmaSwitchboard.java | 33 +-- .../anomic/plasma/plasmaSwitchboardQueue.java | 8 +- .../de/anomic/server/serverInstantThread.java | 11 +- source/de/anomic/yacy/yacySeedDB.java | 3 +- 17 files changed, 214 insertions(+), 256 deletions(-) diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java index 23896add7..279c8231d 100644 --- a/htroot/CacheAdmin_p.java +++ b/htroot/CacheAdmin_p.java @@ -118,7 +118,7 @@ public class CacheAdmin_p { final StringBuffer tree = new StringBuffer(); final StringBuffer info = new StringBuffer(); - final URL url = switchboard.cacheManager.getURL(file); + final URL url = plasmaHTCache.getURL(file); String urlstr = ""; @@ -132,7 +132,7 @@ public class CacheAdmin_p { info.ensureCapacity(10000); try { - final IResourceInfo resInfo = switchboard.cacheManager.loadResourceInfo(url); + final IResourceInfo resInfo = plasmaHTCache.loadResourceInfo(url); if (resInfo == null) { prop.put("info_type", NotCached); } else { @@ -247,8 +247,8 @@ public class CacheAdmin_p { } } - prop.put("cachesize", Long.toString(switchboard.cacheManager.curCacheSize/1024)); - prop.put("cachemax", Long.toString(switchboard.cacheManager.maxCacheSize/1024)); + prop.put("cachesize", Long.toString(plasmaHTCache.curCacheSize/1024)); + prop.put("cachemax", Long.toString(plasmaHTCache.maxCacheSize/1024)); prop.put("path", path.toString()); prop.put("info_info", info.toString()); diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 177f8fbe4..29e4e676d 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -154,9 +154,6 @@ public class CrawlResults { yacySeed initiatorSeed, executorSeed; indexURLEntry urle; - // needed for getCachePath(url) - final plasmaHTCache cacheManager = sb.getCacheManager(); - int i, cnt = 0; for (i = sb.wordIndex.loadedURL.getStackSize(tabletype) - 1; i >= (sb.wordIndex.loadedURL.getStackSize(tabletype) - lines); i--) { initiatorHash = sb.wordIndex.loadedURL.getInitiatorHash(tabletype, i); @@ -173,7 +170,7 @@ public class CrawlResults { urlstr = comp.url().toNormalform(false, true); urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL - cachepath = cacheManager.getCachePath(new URL(urlstr)).toString().replace('\\', '/').substring(cacheManager.cachePath.toString().length() + 1); + cachepath = plasmaHTCache.getCachePath(new URL(urlstr)).toString().replace('\\', '/').substring(plasmaHTCache.cachePath.toString().length() + 1); prop.put("table_indexed_" + cnt + "_dark", (dark) ? 1 : 0); if (showControl) { diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java index bc7df5b26..aadc0d555 100644 --- a/htroot/IndexCreateIndexingQueue_p.java +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -52,6 +52,7 @@ import de.anomic.data.htmlTools; import de.anomic.http.httpHeader; import de.anomic.net.URL; import de.anomic.plasma.plasmaCrawlZURL; +import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboardQueue; import de.anomic.server.serverObjects; @@ -88,7 +89,7 @@ public class IndexCreateIndexingQueue_p { plasmaSwitchboardQueue.Entry entry = null; while ((entry = switchboard.sbQueue.pop()) != null) { if ((entry != null) && (entry.profile() != null) && (!(entry.profile().storeHTCache()))) { - switchboard.cacheManager.deleteFile(entry.url()); + plasmaHTCache.deleteFile(entry.url()); } } } diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index a0cec7647..94c5c1557 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -162,8 +162,8 @@ public class ViewFile { String resMime = null; try { // trying to load the resource body - resource = sb.cacheManager.getResourceContentStream(url); - resourceLength = sb.cacheManager.getResourceContentLength(url); + resource = plasmaHTCache.getResourceContentStream(url); + resourceLength = plasmaHTCache.getResourceContentLength(url); // if the resource body was not cached we try to load it from web if (resource == null) { @@ -179,8 +179,8 @@ public class ViewFile { if (entry != null) { resInfo = entry.getDocumentInfo(); - resource = sb.cacheManager.getResourceContentStream(url); - resourceLength = sb.cacheManager.getResourceContentLength(url); + resource = plasmaHTCache.getResourceContentStream(url); + resourceLength = plasmaHTCache.getResourceContentLength(url); } if (resource == null) { @@ -196,7 +196,7 @@ public class ViewFile { // try to load the metadata from cache try { - resInfo = sb.cacheManager.loadResourceInfo(url); + resInfo = plasmaHTCache.loadResourceInfo(url); } catch (Exception e) { /* ignore this */ } @@ -218,7 +218,7 @@ public class ViewFile { return prop; } try { - resInfo = sb.cacheManager.getResourceInfoFactory().buildResourceInfoObj(url, responseHeader); + resInfo = plasmaHTCache.getResourceInfoFactory().buildResourceInfoObj(url, responseHeader); } catch (Exception e) { prop.put("error", 4); prop.put("error_errorText", e.getMessage()); diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 26f2b45e9..a244aa454 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -113,7 +113,6 @@ public final class httpdProxyHandler { // static variables // can only be instantiated upon first instantiation of this class object private static plasmaSwitchboard switchboard = null; - private static plasmaHTCache cacheManager = null; public static HashSet yellowList = null; private static int timeout = 30000; private static boolean yacyTrigger = true; @@ -185,8 +184,6 @@ public final class httpdProxyHandler { // creating a logger theLogger = new serverLog("PROXY"); - - cacheManager = switchboard.getCacheManager(); isTransparentProxy = Boolean.valueOf(switchboard.getConfig("isTransparentProxy","false")).booleanValue(); @@ -392,10 +389,10 @@ public final class httpdProxyHandler { } // decide wether to use a cache entry or connect to the network - File cacheFile = cacheManager.getCachePath(url); + File cacheFile = plasmaHTCache.getCachePath(url); httpHeader cachedResponseHeader = null; - ResourceInfo cachedResInfo = (ResourceInfo) cacheManager.loadResourceInfo(url); + ResourceInfo cachedResInfo = (ResourceInfo) plasmaHTCache.loadResourceInfo(url); if (cachedResInfo != null) { // set the new request header (needed by function shallUseCacheForProxy) cachedResInfo.setRequestHeader(requestHeader); @@ -428,7 +425,7 @@ public final class httpdProxyHandler { // in two of these cases we trigger a scheduler to handle newly arrived files: // case 1 and case 3 plasmaHTCache.Entry cacheEntry = (cachedResponseHeader == null) ? null : - cacheManager.newEntry( + plasmaHTCache.newEntry( requestDate, // init date 0, // crawling depth url, // url @@ -561,14 +558,14 @@ public final class httpdProxyHandler { if ((cacheFile.isFile()) && (cachedResponseHeader != null)) { // delete the cache sizeBeforeDelete = cacheFile.length(); - cacheManager.deleteFile(url); + plasmaHTCache.deleteFile(url); conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); } // reserver cache entry Date requestDate = new Date(((Long)conProp.get(httpHeader.CONNECTION_PROP_REQUEST_START)).longValue()); IResourceInfo resInfo = new ResourceInfo(url,requestHeader,res.responseHeader); - plasmaHTCache.Entry cacheEntry = cacheManager.newEntry( + plasmaHTCache.Entry cacheEntry = plasmaHTCache.newEntry( requestDate, 0, url, @@ -648,7 +645,7 @@ public final class httpdProxyHandler { // totally fresh file //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert cacheEntry.setCacheArray(cacheArray); - cacheManager.push(cacheEntry); + plasmaHTCache.push(cacheEntry); conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); } else if (sizeBeforeDelete == cacheArray.length) { // before we came here we deleted a cache entry @@ -660,7 +657,7 @@ public final class httpdProxyHandler { // before we came here we deleted a cache entry //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; cacheEntry.setCacheArray(cacheArray); - cacheManager.push(cacheEntry); // necessary update, write response header to cache + plasmaHTCache.push(cacheEntry); // necessary update, write response header to cache conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); } } else { @@ -670,11 +667,11 @@ public final class httpdProxyHandler { res.writeContent(hfos, cacheFile); if (hfos instanceof htmlFilterWriter) ((htmlFilterWriter) hfos).finalize(); theLogger.logFine("for write-file of " + url + ": contentLength = " + contentLength + ", sizeBeforeDelete = " + sizeBeforeDelete); - cacheManager.writeFileAnnouncement(cacheFile); + plasmaHTCache.writeFileAnnouncement(cacheFile); if (sizeBeforeDelete == -1) { // totally fresh file //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert - cacheManager.push(cacheEntry); + plasmaHTCache.push(cacheEntry); conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); } else if (sizeBeforeDelete == cacheFile.length()) { // before we came here we deleted a cache entry @@ -684,7 +681,7 @@ public final class httpdProxyHandler { } else { // before we came here we deleted a cache entry //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; - cacheManager.push(cacheEntry); // necessary update, write response header to cache + plasmaHTCache.push(cacheEntry); // necessary update, write response header to cache conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); } // beware! all these writings will not fill the cacheEntry.cacheArray diff --git a/source/de/anomic/icap/icapd.java b/source/de/anomic/icap/icapd.java index 2837b1cd6..dc5e172b9 100644 --- a/source/de/anomic/icap/icapd.java +++ b/source/de/anomic/icap/icapd.java @@ -92,7 +92,6 @@ public class icapd implements serverHandler { private final serverLog log = new serverLog("ICAPD"); private static plasmaSwitchboard switchboard = null; - private static plasmaHTCache cacheManager = null; private static String virtualHost = null; private static boolean keepAliveSupport = true; @@ -101,7 +100,6 @@ public class icapd implements serverHandler { public icapd() { if (switchboard == null) { switchboard = plasmaSwitchboard.getSwitchboard(); - cacheManager = switchboard.cacheManager; virtualHost = switchboard.getConfig("fileHost","localhost"); } @@ -388,7 +386,7 @@ public class icapd implements serverHandler { // generating a htcache entry object IResourceInfo resInfo = new ResourceInfo(httpRequestURL,httpReqHeader,httpResHeader); - plasmaHTCache.Entry cacheEntry = cacheManager.newEntry( + plasmaHTCache.Entry cacheEntry = plasmaHTCache.newEntry( new Date(), 0, httpRequestURL, @@ -400,11 +398,11 @@ public class icapd implements serverHandler { ); // getting the filename/path to store the response body - File cacheFile = cacheManager.getCachePath(httpRequestURL); + File cacheFile = plasmaHTCache.getCachePath(httpRequestURL); // if the file already exits we delete it if (cacheFile.isFile()) { - cacheManager.deleteFile(httpRequestURL); + plasmaHTCache.deleteFile(httpRequestURL); } // we write the new cache entry to file system directly cacheFile.getParentFile().mkdirs(); @@ -414,7 +412,7 @@ public class icapd implements serverHandler { resBodyStream.close(); resBodyStream = null; // indexing the response - cacheManager.push(cacheEntry); + plasmaHTCache.push(cacheEntry); } catch (Exception e) { e.printStackTrace(); } diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index b926c501f..6c93df145 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -107,11 +107,6 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW */ protected final plasmaSwitchboard sb; - /** - * reference to the cache manager - */ - protected final plasmaHTCache cacheManager; - /** * Logging class */ @@ -130,14 +125,12 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW ThreadGroup theTG, plasmaCrawlerPool thePool, plasmaSwitchboard theSb, - plasmaHTCache theCacheManager, serverLog theLog ) { super(theTG,plasmaCrawlWorker.threadBaseName + "_created"); this.myPool = thePool; this.sb = theSb; - this.cacheManager = theCacheManager; this.log = theLog; } @@ -311,7 +304,7 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW this.sb.errorURL.stackPushEntry(ee); // delete the cache file - File cacheFile = this.cacheManager.getCachePath(this.url); + File cacheFile = plasmaHTCache.getCachePath(this.url); if (cacheFile.exists()) cacheFile.delete(); } } diff --git a/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java b/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java index 8f1ae6710..ba5b8a60e 100644 --- a/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/ftp/CrawlWorker.java @@ -71,8 +71,8 @@ import de.anomic.server.logging.serverLog; public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorker { - public CrawlWorker(ThreadGroup theTG, plasmaCrawlerPool thePool, plasmaSwitchboard theSb, plasmaHTCache theCacheManager, serverLog theLog) { - super(theTG, thePool, theSb, theCacheManager, theLog); + public CrawlWorker(ThreadGroup theTG, plasmaCrawlerPool thePool, plasmaSwitchboard theSb, serverLog theLog) { + super(theTG, thePool, theSb, theLog); // this crawler supports ftp this.protocol = "ftp"; @@ -94,7 +94,7 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke fileDate ); - return this.cacheManager.newEntry( + return plasmaHTCache.newEntry( new Date(), this.depth, this.url, @@ -193,7 +193,7 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke } // creating a cache file object - File cacheFile = this.cacheManager.getCachePath(this.url); + File cacheFile = plasmaHTCache.getCachePath(this.url); // TODO: aborting download if content is to long ... @@ -202,7 +202,7 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke // testing if the file already exists if (cacheFile.isFile()) { // delete the file if it already exists - this.cacheManager.deleteFile(this.url); + plasmaHTCache.deleteFile(this.url); } else { // create parent directories cacheFile.getParentFile().mkdirs(); @@ -268,11 +268,11 @@ public class CrawlWorker extends AbstractCrawlWorker implements plasmaCrawlWorke if (cacheFile.exists()) cacheFile.delete(); } else { // announce the file - this.cacheManager.writeFileAnnouncement(cacheFile); + plasmaHTCache.writeFileAnnouncement(cacheFile); // enQueue new entry with response header if (this.profile != null) { - this.cacheManager.push(htCache); + plasmaHTCache.push(htCache); } } diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java index 10f9af9ec..9daca7726 100644 --- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java @@ -103,16 +103,14 @@ public final class CrawlWorker extends AbstractCrawlWorker { * @param theTG * @param thePool * @param theSb - * @param theCacheManager * @param theLog */ public CrawlWorker( ThreadGroup theTG, plasmaCrawlerPool thePool, plasmaSwitchboard theSb, - plasmaHTCache theCacheManager, serverLog theLog) { - super(theTG,thePool,theSb,theCacheManager,theLog); + super(theTG,thePool,theSb,theLog); // this crawler supports http this.protocol = "http"; @@ -144,7 +142,7 @@ public final class CrawlWorker extends AbstractCrawlWorker { protected plasmaHTCache.Entry createCacheEntry(URL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) { IResourceInfo resourceInfo = new ResourceInfo(requestUrl,requestHeader,response.responseHeader); - return this.cacheManager.newEntry( + return plasmaHTCache.newEntry( requestDate, this.depth, this.url, @@ -213,29 +211,29 @@ public final class CrawlWorker extends AbstractCrawlWorker { // aborting download if content is to long ... if (htCache.cacheFile().getAbsolutePath().length() > serverSystem.maxPathLength) { remote.close(); - this.log.logInfo("REJECTED URL " + this.url.toString() + " because path too long '" + this.cacheManager.cachePath.getAbsolutePath() + "'"); + this.log.logInfo("REJECTED URL " + this.url.toString() + " because path too long '" + plasmaHTCache.cachePath.getAbsolutePath() + "'"); addURLtoErrorDB(plasmaCrawlEURL.DENIED_CACHEFILE_PATH_TOO_LONG); return (htCache = null); } // reserve cache entry - if (!htCache.cacheFile().getCanonicalPath().startsWith(this.cacheManager.cachePath.getCanonicalPath())) { + if (!htCache.cacheFile().getCanonicalPath().startsWith(plasmaHTCache.cachePath.getCanonicalPath())) { // if the response has not the right file type then reject file remote.close(); this.log.logInfo("REJECTED URL " + this.url.toString() + " because of an invalid file path ('" + htCache.cacheFile().getCanonicalPath() + "' does not start with '" + - this.cacheManager.cachePath.getAbsolutePath() + "')."); + plasmaHTCache.cachePath.getAbsolutePath() + "')."); addURLtoErrorDB(plasmaCrawlEURL.DENIED_INVALID_CACHEFILE_PATH); return (htCache = null); } // request has been placed and result has been returned. work off response - File cacheFile = this.cacheManager.getCachePath(this.url); + File cacheFile = plasmaHTCache.getCachePath(this.url); try { if ((this.acceptAllContent) || (plasmaParser.supportedContent(plasmaParser.PARSER_MODE_CRAWLER,this.url,res.responseHeader.mime()))) { // delete old content if (cacheFile.isFile()) { - this.cacheManager.deleteFile(this.url); + plasmaHTCache.deleteFile(this.url); } // create parent directories @@ -275,14 +273,14 @@ public final class CrawlWorker extends AbstractCrawlWorker { byte[] cacheArray = null; cacheArray = res.writeContent(fos,this.keepInMemory); htCache.setCacheArray(cacheArray); - this.cacheManager.writeFileAnnouncement(cacheFile); + plasmaHTCache.writeFileAnnouncement(cacheFile); } finally { if (fos!=null)try{fos.close();}catch(Exception e){/* ignore this */} } // enQueue new entry with response header if (this.profile != null) { - this.cacheManager.push(htCache); + plasmaHTCache.push(htCache); } } else { // if the response has not the right file type then reject file diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java b/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java index 6f974957d..e56cb3e6f 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java @@ -50,7 +50,6 @@ import java.lang.reflect.Constructor; import org.apache.commons.pool.KeyedPoolableObjectFactory; -import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.logging.serverLog; @@ -58,14 +57,12 @@ public final class plasmaCrawlerFactory implements KeyedPoolableObjectFactory { private plasmaCrawlerPool thePool; private final ThreadGroup theThreadGroup; - private final plasmaHTCache cacheManager; private final serverLog theLog; private final plasmaSwitchboard sb; public plasmaCrawlerFactory( ThreadGroup threadGroup, plasmaSwitchboard theSb, - plasmaHTCache theCacheManager, serverLog log ) { @@ -75,7 +72,6 @@ public final class plasmaCrawlerFactory implements KeyedPoolableObjectFactory { throw new IllegalArgumentException("The threadgroup object must not be null."); this.theThreadGroup = threadGroup; - this.cacheManager = theCacheManager; this.sb = theSb; this.theLog = log; } @@ -106,7 +102,6 @@ public final class plasmaCrawlerFactory implements KeyedPoolableObjectFactory { ThreadGroup.class, plasmaCrawlerPool.class, plasmaSwitchboard.class, - plasmaHTCache.class, serverLog.class } ); @@ -115,7 +110,6 @@ public final class plasmaCrawlerFactory implements KeyedPoolableObjectFactory { this.theThreadGroup, (usePool)?this.thePool:null, this.sb, - this.cacheManager, this.theLog }); diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java index 33bf5f78a..86545d1b2 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoader.java +++ b/source/de/anomic/plasma/plasmaCrawlLoader.java @@ -61,8 +61,7 @@ import de.anomic.server.logging.serverLog; public final class plasmaCrawlLoader extends Thread { public static plasmaSwitchboard switchboard; - - private final plasmaHTCache cacheManager; + private final serverLog log; private HashSet supportedProtocols; @@ -73,13 +72,10 @@ public final class plasmaCrawlLoader extends Thread { private final ThreadGroup theThreadGroup = new ThreadGroup("CrawlerThreads"); private boolean stopped = false; - public plasmaCrawlLoader( - plasmaHTCache theCacheManager, - serverLog theLog) { + public plasmaCrawlLoader(serverLog theLog) { this.setName("plasmaCrawlLoader"); - this.cacheManager = theCacheManager; this.log = theLog; // supported protocols @@ -117,7 +113,6 @@ public final class plasmaCrawlLoader extends Thread { plasmaCrawlerFactory theFactory = new plasmaCrawlerFactory( this.theThreadGroup, switchboard, - this.cacheManager, this.log); this.crawlwerPool = new plasmaCrawlerPool(theFactory,this.crawlerPoolConfig,this.theThreadGroup); diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index 4476e6c0a..b79680b83 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -100,37 +100,34 @@ public final class plasmaHTCache { private static final int stackLimit = 150; // if we exceed that limit, we do not check idle public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day - kelondroMapObjects responseHeaderDB = null; - private final LinkedList cacheStack; - private final Map cacheAge; // a - relation - public long curCacheSize; - public long maxCacheSize; - public final File cachePath; - public final serverLog log; + private static kelondroMapObjects responseHeaderDB = null; + private static final LinkedList cacheStack = new LinkedList(); + private static final Map cacheAge = Collections.synchronizedMap(new TreeMap()); // a - relation + public static long curCacheSize = 0; + public static long maxCacheSize; + public static File cachePath; + public static final serverLog log = new serverLog("HTCACHE"); public static final HashSet filesInUse = new HashSet(); // can we delete this file - public String cacheLayout; - public boolean cacheMigration; + public static String cacheLayout; + public static boolean cacheMigration; - private ResourceInfoFactory objFactory; - private serverThread cacheScanThread; + private static ResourceInfoFactory objFactory = new ResourceInfoFactory(); + private static serverThread cacheScanThread; - public plasmaHTCache(File htCachePath, long maxCacheSize, long preloadTime, String cacheLayout, boolean cacheMigration) { - // this.switchboard = switchboard; - - this.log = new serverLog("HTCACHE"); - this.cachePath = htCachePath; - this.cacheLayout = cacheLayout; - this.cacheMigration = cacheMigration; + public static void init(File htCachePath, long CacheSizeMax, long preloadTime, String layout, boolean migration) { + + cachePath = htCachePath; + cacheLayout = layout; + cacheMigration = migration; + maxCacheSize = CacheSizeMax; - // create the object factory - this.objFactory = new ResourceInfoFactory(); // reset old HTCache ? - String[] list = this.cachePath.list(); + String[] list = cachePath.list(); if (list != null) { File object; for (int i = list.length - 1; i >= 0; i--) { - object = new File(this.cachePath, list[i]); + object = new File(cachePath, list[i]); if (!object.isDirectory()) { continue; } @@ -138,13 +135,13 @@ public final class plasmaHTCache { !object.getName().equals("yacy") && !object.getName().equals("https") && !object.getName().equals("ftp")) { - deleteOldHTCache(this.cachePath); + deleteOldHTCache(cachePath); break; } } } - File testpath = new File(this.cachePath, "/http/"); + File testpath = new File(cachePath, "/http/"); list = testpath.list(); if (list != null) { File object; @@ -156,7 +153,7 @@ public final class plasmaHTCache { if (!object.getName().equals("ip") && !object.getName().equals("other") && !object.getName().equals("www")) { - deleteOldHTCache(this.cachePath); + deleteOldHTCache(cachePath); break; } } @@ -170,40 +167,36 @@ public final class plasmaHTCache { } if (!htCachePath.isDirectory()) { // if the cache does not exists or is a file and not a directory, panic - this.log.logSevere("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created"); + log.logSevere("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created"); System.exit(0); } // open the response header database openResponseHeaderDB(preloadTime); - // init stack - this.cacheStack = new LinkedList(); - - // init cache age and size management - this.cacheAge = Collections.synchronizedMap(new TreeMap()); - this.curCacheSize = 0; - this.maxCacheSize = maxCacheSize; - // start the cache startup thread // this will collect information about the current cache size and elements - this.cacheScanThread = serverInstantThread.oneTimeJob(this, "cacheScan", this.log, 120000); + try { + cacheScanThread = serverInstantThread.oneTimeJob(Class.forName("de.anomic.plasma.plasmaHTCache"), "cacheScan", log, 120000); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } } - private void resetResponseHeaderDB() { - if (this.responseHeaderDB != null) this.responseHeaderDB.close(); - File dbfile = new File(this.cachePath, DB_NAME); + private static void resetResponseHeaderDB() { + if (responseHeaderDB != null) responseHeaderDB.close(); + File dbfile = new File(cachePath, DB_NAME); if (dbfile.exists()) dbfile.delete(); openResponseHeaderDB(0); } - private void openResponseHeaderDB(long preloadTime) { + private static void openResponseHeaderDB(long preloadTime) { // open the response header database - File dbfile = new File(this.cachePath, DB_NAME); - this.responseHeaderDB = new kelondroMapObjects(new kelondroDyn(dbfile, true, true, preloadTime, yacySeedDB.commonHashLength, 150, '#', kelondroBase64Order.enhancedCoder, true, false, true), 500); + File dbfile = new File(cachePath, DB_NAME); + responseHeaderDB = new kelondroMapObjects(new kelondroDyn(dbfile, true, true, preloadTime, yacySeedDB.commonHashLength, 150, '#', kelondroBase64Order.enhancedCoder, true, false, true), 500); } - private void deleteOldHTCache(File directory) { + private static void deleteOldHTCache(File directory) { String[] list = directory.list(); if (list != null) { File object; @@ -219,26 +212,26 @@ public final class plasmaHTCache { directory.delete(); } - public int size() { - synchronized (this.cacheStack) { - return this.cacheStack.size(); + public static int size() { + synchronized (cacheStack) { + return cacheStack.size(); } } - public int dbSize() { - return this.responseHeaderDB.size(); + public static int dbSize() { + return responseHeaderDB.size(); } - public void push(Entry entry) { - synchronized (this.cacheStack) { - this.cacheStack.add(entry); + public static void push(Entry entry) { + synchronized (cacheStack) { + cacheStack.add(entry); } } - public Entry pop() { - synchronized (this.cacheStack) { - if (this.cacheStack.size() > 0) - return (Entry) this.cacheStack.removeFirst(); + public static Entry pop() { + synchronized (cacheStack) { + if (cacheStack.size() > 0) + return (Entry) cacheStack.removeFirst(); return null; } } @@ -247,19 +240,19 @@ public final class plasmaHTCache { * This method changes the HTCache size.
* @param the new cache size in bytes */ - public void setCacheSize(long newCacheSize) { - this.maxCacheSize = newCacheSize; + public static void setCacheSize(long newCacheSize) { + maxCacheSize = newCacheSize; } /** * This method returns the free HTCache size.
* @return the cache size in bytes */ - public long getFreeSize() { - return (this.curCacheSize >= this.maxCacheSize) ? 0 : this.maxCacheSize - this.curCacheSize; + public static long getFreeSize() { + return (curCacheSize >= maxCacheSize) ? 0 : maxCacheSize - curCacheSize; } - public boolean writeResourceContent(URL url, byte[] array) { + public static boolean writeResourceContent(URL url, byte[] array) { if (array == null) return false; File file = getCachePath(url); try { @@ -270,69 +263,69 @@ public final class plasmaHTCache { // this is the case of a "(Not a directory)" error, which should be prohibited // by the shallStoreCache() property. However, sometimes the error still occurs // In this case do nothing. - this.log.logSevere("File storage failed (not a directory): " + e.getMessage()); + log.logSevere("File storage failed (not a directory): " + e.getMessage()); return false; } catch (IOException e) { - this.log.logSevere("File storage failed (IO error): " + e.getMessage()); + log.logSevere("File storage failed (IO error): " + e.getMessage()); return false; } writeFileAnnouncement(file); return true; } - private long lastcleanup = System.currentTimeMillis(); - public void writeFileAnnouncement(File file) { - synchronized (this.cacheAge) { + private static long lastcleanup = System.currentTimeMillis(); + public static void writeFileAnnouncement(File file) { + synchronized (cacheAge) { if (file.exists()) { - this.curCacheSize += file.length(); + curCacheSize += file.length(); if (System.currentTimeMillis() - lastcleanup > 300000) { // call the cleanup job only every 5 minutes cleanup(); lastcleanup = System.currentTimeMillis(); } - this.cacheAge.put(ageString(file.lastModified(), file), file); + cacheAge.put(ageString(file.lastModified(), file), file); } } } - public boolean deleteFile(URL url) { + public static boolean deleteFile(URL url) { return deleteURLfromCache("", url, "FROM"); } - private boolean deleteURLfromCache(String key, URL url, String msg) { + private static boolean deleteURLfromCache(String key, URL url, String msg) { if (deleteFileandDirs(key, getCachePath(url), msg)) { try { // As the file is gone, the entry in responseHeader.db is not needed anymore - this.log.logFinest("Trying to remove responseHeader from URL: " + url.toNormalform(false, true)); - this.responseHeaderDB.remove(plasmaURL.urlHash(url)); + log.logFinest("Trying to remove responseHeader from URL: " + url.toNormalform(false, true)); + responseHeaderDB.remove(plasmaURL.urlHash(url)); } catch (IOException e) { resetResponseHeaderDB(); - this.log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); + log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); } return true; } return false; } - private boolean deleteFile(File obj) { + private static boolean deleteFile(File obj) { if (obj.exists() && !filesInUse.contains(obj)) { long size = obj.length(); if (obj.delete()) { - this.curCacheSize -= size; + curCacheSize -= size; return true; } } return false; } - private boolean deleteFileandDirs(String key, File obj, String msg) { + private static boolean deleteFileandDirs(String key, File obj, String msg) { if (deleteFile(obj)) { - this.log.logInfo("DELETED " + msg + " CACHE [" + key + "]: " + obj.toString()); + log.logInfo("DELETED " + msg + " CACHE [" + key + "]: " + obj.toString()); obj = obj.getParentFile(); // If the has been emptied, remove it // Loop as long as we produce empty driectoriers, but stop at HTCACHE - while ((!(obj.equals(this.cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) { - if (obj.delete()) this.log.logFine("DELETED EMPTY DIRECTORY : " + obj.toString()); + while ((!(obj.equals(cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) { + if (obj.delete()) log.logFine("DELETED EMPTY DIRECTORY : " + obj.toString()); obj = obj.getParentFile(); } return true; @@ -340,12 +333,12 @@ public final class plasmaHTCache { return false; } - private void cleanupDoIt(long newCacheSize) { + private static void cleanupDoIt(long newCacheSize) { File file; synchronized (cacheAge) { - Iterator iter = this.cacheAge.entrySet().iterator(); + Iterator iter = cacheAge.entrySet().iterator(); Map.Entry entry; - while (iter.hasNext() && this.curCacheSize >= newCacheSize) { + while (iter.hasNext() && curCacheSize >= newCacheSize) { if (Thread.currentThread().isInterrupted()) return; entry = (Map.Entry) iter.next(); String key = (String) entry.getKey(); @@ -354,23 +347,23 @@ public final class plasmaHTCache { if (System.currentTimeMillis() - t < 300000) break; // files must have been at least 5 minutes in the cache before they are deleted if (file != null) { if (filesInUse.contains(file)) continue; - this.log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString()); + log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString()); if (deleteFileandDirs(key, file, "OLD")) { try { // As the file is gone, the entry in responseHeader.db is not needed anymore String urlHash = getHash(file); if (urlHash != null) { - this.log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash); - this.responseHeaderDB.remove(urlHash); + log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash); + responseHeaderDB.remove(urlHash); } else { URL url = getURL(file); if (url != null) { - this.log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true)); - this.responseHeaderDB.remove(plasmaURL.urlHash(url)); + log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true)); + responseHeaderDB.remove(plasmaURL.urlHash(url)); } } } catch (IOException e) { - this.log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); + log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e); } } } @@ -379,26 +372,26 @@ public final class plasmaHTCache { } } - private void cleanup() { + private static void cleanup() { // clean up cache to have 4% (enough) space for next entries - if (this.cacheAge.size() > 0 && - this.curCacheSize >= this.maxCacheSize && - this.maxCacheSize > 0) { - cleanupDoIt(this.maxCacheSize - (this.maxCacheSize / 100) * 4); + if (cacheAge.size() > 0 && + curCacheSize >= maxCacheSize && + maxCacheSize > 0) { + cleanupDoIt(maxCacheSize - (maxCacheSize / 100) * 4); } } - public void close() { + public static void close() { // closing cache scan if still running - if ((this.cacheScanThread != null) && (this.cacheScanThread.isAlive())) { - this.cacheScanThread.terminate(true); + if ((cacheScanThread != null) && (cacheScanThread.isAlive())) { + cacheScanThread.terminate(true); } // closing DB - this.responseHeaderDB.close(); + responseHeaderDB.close(); } - private String ageString(long date, File f) { + private static String ageString(long date, File f) { StringBuffer sb = new StringBuffer(32); String s = Long.toHexString(date); for (int i = s.length(); i < 16; i++) sb.append('0'); @@ -409,12 +402,12 @@ public final class plasmaHTCache { return sb.toString(); } - public void cacheScan() { + public static void cacheScan() { log.logConfig("STARTING HTCACHE SCANNING"); kelondroMScoreCluster doms = new kelondroMScoreCluster(); int fileCount = 0; - enumerateFiles fileEnum = new enumerateFiles(this.cachePath, true, false, true, true); - File dbfile = new File(this.cachePath, "responseHeader.db"); + enumerateFiles fileEnum = new enumerateFiles(cachePath, true, false, true, true); + File dbfile = new File(cachePath, "responseHeader.db"); while (fileEnum.hasMoreElements()) { if (Thread.currentThread().isInterrupted()) return; fileCount++; @@ -422,8 +415,8 @@ public final class plasmaHTCache { long nextFileModDate = nextFile.lastModified(); //System.out.println("Cache: " + dom(f)); doms.incScore(dom(nextFile)); - this.curCacheSize += nextFile.length(); - if (!dbfile.equals(nextFile)) this.cacheAge.put(ageString(nextFileModDate, nextFile), nextFile); + curCacheSize += nextFile.length(); + if (!dbfile.equals(nextFile)) cacheAge.put(ageString(nextFileModDate, nextFile), nextFile); try { Thread.sleep(10); } catch (InterruptedException e) { @@ -432,8 +425,8 @@ public final class plasmaHTCache { } //System.out.println("%" + (String) cacheAge.firstKey() + "=" + cacheAge.get(cacheAge.firstKey())); long ageHours = 0; - if (!this.cacheAge.isEmpty()) { - Iterator i = this.cacheAge.keySet().iterator(); + if (!cacheAge.isEmpty()) { + Iterator i = cacheAge.keySet().iterator(); if (i.hasNext()) try { ageHours = (System.currentTimeMillis() - Long.parseLong(((String) i.next()).substring(0, 16), 16)) / 3600000; } catch (NumberFormatException e) { @@ -442,8 +435,8 @@ public final class plasmaHTCache { ageHours = 0; } } - this.log.logConfig("CACHE SCANNED, CONTAINS " + fileCount + - " FILES = " + this.curCacheSize/1048576 + "MB, OLDEST IS " + + log.logConfig("CACHE SCANNED, CONTAINS " + fileCount + + " FILES = " + curCacheSize/1048576 + "MB, OLDEST IS " + ((ageHours < 24) ? (ageHours + " HOURS") : ((ageHours / 24) + " DAYS")) + " OLD"); cleanup(); @@ -459,7 +452,7 @@ public final class plasmaHTCache { InetAddress ip = serverDomains.dnsResolve(dom); if (ip == null) continue; result += ", " + dom + "=" + ip.getHostAddress(); - this.log.logConfig("PRE-FILLED " + dom + "=" + ip.getHostAddress()); + log.logConfig("PRE-FILLED " + dom + "=" + ip.getHostAddress()); fileCount++; doms.deleteScore(dom); // wait a short while to prevent that this looks like a DoS @@ -469,12 +462,12 @@ public final class plasmaHTCache { return; } } - if (result.length() > 2) this.log.logConfig("PRE-FILLED DNS CACHE, FETCHED " + fileCount + + if (result.length() > 2) log.logConfig("PRE-FILLED DNS CACHE, FETCHED " + fileCount + " ADDRESSES: " + result.substring(2)); } - private String dom(File f) { - String s = f.toString().substring(this.cachePath.toString().length() + 1); + private static String dom(File f) { + String s = f.toString().substring(cachePath.toString().length() + 1); int p = s.indexOf("/"); if (p < 0) p = s.indexOf("\\"); if (p < 0) return null; @@ -504,30 +497,30 @@ public final class plasmaHTCache { * @throws UnsupportedProtocolException if the protocol is not supported and therefore the * info object couldn't be created */ - public IResourceInfo loadResourceInfo(URL url) throws UnsupportedProtocolException, IllegalAccessException { + public static IResourceInfo loadResourceInfo(URL url) throws UnsupportedProtocolException, IllegalAccessException { // getting the URL hash String urlHash = plasmaURL.urlHash(url.toNormalform(true, true)); // loading data from database - Map hdb = this.responseHeaderDB.getMap(urlHash); + Map hdb = responseHeaderDB.getMap(urlHash); if (hdb == null) return null; // generate the cached object - IResourceInfo cachedObj = this.objFactory.buildResourceInfoObj(url, hdb); + IResourceInfo cachedObj = objFactory.buildResourceInfoObj(url, hdb); return cachedObj; } - public ResourceInfoFactory getResourceInfoFactory() { - return this.objFactory; + public static ResourceInfoFactory getResourceInfoFactory() { + return objFactory; } - public boolean full() { - return (this.cacheStack.size() > stackLimit); + public static boolean full() { + return (cacheStack.size() > stackLimit); } - public boolean empty() { - return (this.cacheStack.size() == 0); + public static boolean empty() { + return (cacheStack.size() == 0); } public static boolean isPicture(String mimeType) { @@ -565,7 +558,7 @@ public final class plasmaHTCache { /** * This function moves an old cached object (if it exists) to the new position */ - private void moveCachedObject(File oldpath, File newpath) { + private static void moveCachedObject(File oldpath, File newpath) { try { if (oldpath.exists() && oldpath.isFile() && (!newpath.exists())) { long d = oldpath.lastModified(); @@ -573,8 +566,8 @@ public final class plasmaHTCache { if (oldpath.renameTo(newpath)) { cacheAge.put(ageString(d, newpath), newpath); File obj = oldpath.getParentFile(); - while ((!(obj.equals(this.cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) { - if (obj.delete()) this.log.logFine("DELETED EMPTY DIRECTORY : " + obj.toString()); + while ((!(obj.equals(cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) { + if (obj.delete()) log.logFine("DELETED EMPTY DIRECTORY : " + obj.toString()); obj = obj.getParentFile(); } } @@ -585,7 +578,7 @@ public final class plasmaHTCache { } } - private String replaceRegex(String input, String regex, String replacement) { + private static String replaceRegex(String input, String regex, String replacement) { if (input == null) { return ""; } if (input.length() > 0) { final Pattern searchPattern = Pattern.compile(regex); @@ -608,7 +601,7 @@ public final class plasmaHTCache { * that path will be generated * @return new File */ - public File getCachePath(final URL url) { + public static File getCachePath(final URL url) { // this.log.logFinest("plasmaHTCache: getCachePath: IN=" + url.toString()); // peer.yacy || www.peer.yacy = http/yacy/peer @@ -687,22 +680,22 @@ public final class plasmaHTCache { return null; } - private File treeFile(StringBuffer fileName, String prefix, String path) { + private static File treeFile(StringBuffer fileName, String prefix, String path) { StringBuffer f = new StringBuffer(fileName.length() + 30); f.append(fileName); if (prefix != null) f.append('/').append(prefix); f.append(path); - return new File(this.cachePath, f.toString()); + return new File(cachePath, f.toString()); } - private File hashFile(StringBuffer fileName, String prefix, String extention, URL url) { + private static File hashFile(StringBuffer fileName, String prefix, String extention, URL url) { String hexHash = yacySeed.b64Hash2hexHash(plasmaURL.urlHash(url)); StringBuffer f = new StringBuffer(fileName.length() + 30); f.append(fileName); if (prefix != null) f.append('/').append(prefix); f.append('/').append(hexHash.substring(0,2)).append('/').append(hexHash.substring(2,4)).append('/').append(hexHash); if (extention != null) fileName.append(extention); - return new File(this.cachePath, f.toString()); + return new File(cachePath, f.toString()); } @@ -727,7 +720,7 @@ public final class plasmaHTCache { * this is the reverse function to getCachePath: it constructs the url as string * from a given storage path */ - public URL getURL(final File f) { + public static URL getURL(final File f) { // this.log.logFinest("plasmaHTCache: getURL: IN: Path=[" + cachePath + "] File=[" + f + "]"); final String urlHash = getHash(f); if (urlHash != null) { @@ -742,7 +735,7 @@ public final class plasmaHTCache { if (url != null) return url; // try responseHeaderDB Map hdb; - hdb = this.responseHeaderDB.getMap(urlHash); + hdb = responseHeaderDB.getMap(urlHash); if (hdb != null) { Object origRequestLine = hdb.get(httpHeader.X_YACY_ORIGINAL_REQUEST_LINE); if ((origRequestLine != null)&&(origRequestLine instanceof String)) { @@ -853,19 +846,19 @@ public final class plasmaHTCache { * is available or the cached file is not readable, null * is returned. */ - public InputStream getResourceContentStream(URL url) { + public static InputStream getResourceContentStream(URL url) { // load the url as resource from the cache File f = getCachePath(url); if (f.exists() && f.canRead()) try { return new BufferedInputStream(new FileInputStream(f)); } catch (IOException e) { - this.log.logSevere("Unable to create a BufferedInputStream from file " + f,e); + log.logSevere("Unable to create a BufferedInputStream from file " + f,e); return null; } return null; } - public long getResourceContentLength(URL url) { + public static long getResourceContentLength(URL url) { // load the url as resource from the cache File f = getCachePath(url); if (f.exists() && f.canRead()) { @@ -890,7 +883,7 @@ public final class plasmaHTCache { (ls.indexOf("memberlist.php?sid=") >= 0)); } - public Entry newEntry( + public static Entry newEntry( Date initDate, int depth, URL url, @@ -916,7 +909,7 @@ public final class plasmaHTCache { ); } - public final class Entry { + public final static class Entry { // the class objects private Date initDate; // the date when the request happened; will be used as a key @@ -1080,7 +1073,7 @@ public final class plasmaHTCache { assert(this.nomalizedURLHash != null) : "URL Hash is null"; if (this.resInfo == null) return false; try { - plasmaHTCache.this.responseHeaderDB.set(this.nomalizedURLHash, this.resInfo.getMap()); + responseHeaderDB.set(this.nomalizedURLHash, this.resInfo.getMap()); } catch (Exception e) { resetResponseHeaderDB(); return false; diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 0f7aefd87..dcf433296 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -97,16 +97,13 @@ public class plasmaSnippetCache { * */ private static HashMap faviconCache; - private static plasmaHTCache cacheManager; private static plasmaParser parser; private static serverLog log; public static void init( - plasmaHTCache cacheManagerx, plasmaParser parserx, serverLog logx ) { - cacheManager = cacheManagerx; parser = parserx; log = logx; snippetsScoreCounter = 0; @@ -275,10 +272,10 @@ public class plasmaSnippetCache { IResourceInfo resInfo = null; try { // trying to load the resource from the cache - resContent = cacheManager.getResourceContentStream(url); + resContent = plasmaHTCache.getResourceContentStream(url); if (resContent != null) { // if the content was found - resContentLength = cacheManager.getResourceContentLength(url); + resContentLength = plasmaHTCache.getResourceContentLength(url); } else if (fetchOnline) { // if not found try to download it @@ -295,8 +292,8 @@ public class plasmaSnippetCache { resContent = new ByteArrayInputStream(resourceArray); resContentLength = resourceArray.length; } else { - resContent = cacheManager.getResourceContentStream(url); - resContentLength = cacheManager.getResourceContentLength(url); + resContent = plasmaHTCache.getResourceContentStream(url); + resContentLength = plasmaHTCache.getResourceContentLength(url); } } @@ -380,10 +377,10 @@ public class plasmaSnippetCache { IResourceInfo resInfo = null; try { // trying to load the resource from the cache - resContent = cacheManager.getResourceContentStream(url); + resContent = plasmaHTCache.getResourceContentStream(url); if (resContent != null) { // if the content was found - resContentLength = cacheManager.getResourceContentLength(url); + resContentLength = plasmaHTCache.getResourceContentLength(url); } else if (fetchOnline) { // if not found try to download it @@ -400,8 +397,8 @@ public class plasmaSnippetCache { resContent = new ByteArrayInputStream(resourceArray); resContentLength = resourceArray.length; } else { - resContent = cacheManager.getResourceContentStream(url); - resContentLength = cacheManager.getResourceContentLength(url); + resContent = plasmaHTCache.getResourceContentStream(url); + resContentLength = plasmaHTCache.getResourceContentLength(url); } } @@ -749,7 +746,7 @@ public class plasmaSnippetCache { if (docInfo == null) { // try to get the header from the htcache directory try { - docInfo = cacheManager.loadResourceInfo(url); + docInfo = plasmaHTCache.loadResourceInfo(url); } catch (Exception e) { // ignore this. resource info loading failed } @@ -763,7 +760,7 @@ public class plasmaSnippetCache { // getting URL mimeType try { httpHeader header = httpc.whead(url, url.getHost(), 10000, null, null, plasmaSwitchboard.getSwitchboard().remoteProxyConfig); - docInfo = cacheManager.getResourceInfoFactory().buildResourceInfoObj(url, header); + docInfo = plasmaHTCache.getResourceInfoFactory().buildResourceInfoObj(url, header); } catch (Exception e) { // ingore this. http header download failed } @@ -771,7 +768,7 @@ public class plasmaSnippetCache { // STEP 3: if the metadata is still null try to guess the mimeType of the resource if (docInfo == null) { - String filename = cacheManager.getCachePath(url).getName(); + String filename = plasmaHTCache.getCachePath(url).getName(); int p = filename.lastIndexOf('.'); if ( // if no extension is available (p < 0) || @@ -820,9 +817,9 @@ public class plasmaSnippetCache { long contentLength = -1; // trying to load the resource body from cache - InputStream resource = cacheManager.getResourceContentStream(url); + InputStream resource = plasmaHTCache.getResourceContentStream(url); if (resource != null) { - contentLength = cacheManager.getResourceContentLength(url); + contentLength = plasmaHTCache.getResourceContentLength(url); } else if (fetchOnline) { // if the content is not available in cache try to download it from web @@ -834,8 +831,8 @@ public class plasmaSnippetCache { // in case that the reosurce was not in ram, read it from disk if (resourceArray == null) { - resource = cacheManager.getResourceContentStream(url); - contentLength = cacheManager.getResourceContentLength(url); + resource = plasmaHTCache.getResourceContentStream(url); + contentLength = plasmaHTCache.getResourceContentLength(url); } else { resource = new ByteArrayInputStream(resourceArray); contentLength = resourceArray.length; diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index c673f7cce..c6279151e 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -217,7 +217,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public plasmaCrawlNURL noticeURL; public plasmaCrawlZURL errorURL, delegatedURL; public plasmaWordIndex wordIndex; - public plasmaHTCache cacheManager; public plasmaCrawlLoader cacheLoader; public plasmaSwitchboardQueue sbQueue; public plasmaCrawlStacker sbStackCrawlThread; @@ -1100,7 +1099,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig(PROXY_CACHE_SIZE, "2")); // this is megabyte String cacheLayout = getConfig(PROXY_CACHE_LAYOUT, PROXY_CACHE_LAYOUT_TREE); boolean cacheMigration = getConfigBool(PROXY_CACHE_MIGRATION, true); - this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP_time, cacheLayout, cacheMigration); + plasmaHTCache.init(htCachePath, maxCacheSize, ramHTTP_time, cacheLayout, cacheMigration); // create the release download directory String release = getConfig(RELEASE_PATH, RELEASE_PATH_DEFAULT); @@ -1164,7 +1163,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser * initialize switchboard queue * ====================================================================== */ // create queue - this.sbQueue = new plasmaSwitchboardQueue(this.cacheManager, this.wordIndex.loadedURL, new File(this.plasmaPath, "switchboardQueue1.stack"), this.profiles); + this.sbQueue = new plasmaSwitchboardQueue(this.wordIndex.loadedURL, new File(this.plasmaPath, "switchboardQueue1.stack"), this.profiles); // setting the indexing queue slots indexingSlots = (int) getConfigLong(INDEXER_SLOTS, 30); @@ -1202,7 +1201,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser log.logConfig("Starting Crawl Loader"); crawlSlots = Integer.parseInt(getConfig(CRAWLER_THREADS_ACTIVE_MAX, "10")); plasmaCrawlLoader.switchboard = this; - this.cacheLoader = new plasmaCrawlLoader(this.cacheManager, this.log); + this.cacheLoader = new plasmaCrawlLoader(this.log); /* * Creating sync objects and loading status for the crawl jobs @@ -1291,7 +1290,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // generate snippets cache log.logConfig("Initializing Snippet Cache"); - plasmaSnippetCache.init(cacheManager, parser,log); + plasmaSnippetCache.init(parser, log); // start yacy core log.logConfig("Starting YaCy Protocol Core"); @@ -1530,7 +1529,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser * @param newCacheSize in MB */ public final void setCacheSize(long newCacheSize) { - this.cacheManager.setCacheSize(1048576 * newCacheSize); + plasmaHTCache.setCacheSize(1048576 * newCacheSize); } public boolean onlineCaution() { @@ -1651,15 +1650,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return hasDoneSomething; } - public plasmaHTCache getCacheManager() { - return cacheManager; - } - synchronized public void htEntryStoreEnqueued(plasmaHTCache.Entry entry) { - if (cacheManager.full()) + if (plasmaHTCache.full()) htEntryStoreProcess(entry); else - cacheManager.push(entry); + plasmaHTCache.push(entry); } synchronized public boolean htEntryStoreProcess(plasmaHTCache.Entry entry) { @@ -1724,7 +1719,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } else { String error = entry.shallStoreCacheForProxy(); if (error == null) { - this.cacheManager.writeResourceContent(entry.url(), entry.cacheArray()); + plasmaHTCache.writeResourceContent(entry.url(), entry.cacheArray()); this.log.logFine("WROTE FILE (" + entry.cacheArray().length + " bytes) for " + entry.cacheFile()); } else { this.log.logFine("WRITE OF FILE " + entry.cacheFile() + " FORBIDDEN: " + error); @@ -1755,7 +1750,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser )); } else { if (!entry.profile().storeHTCache() && entry.cacheFile().exists()) { - this.cacheManager.deleteFile(entry.url()); + plasmaHTCache.deleteFile(entry.url()); } } @@ -1763,12 +1758,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } public boolean htEntryStoreJob() { - if (cacheManager.empty()) return false; - return htEntryStoreProcess(cacheManager.pop()); + if (plasmaHTCache.empty()) return false; + return htEntryStoreProcess(plasmaHTCache.pop()); } public int htEntrySize() { - return cacheManager.size(); + return plasmaHTCache.size(); } public void close() { @@ -1790,7 +1785,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser profiles.close(); robots.close(); parser.close(); - cacheManager.close(); + plasmaHTCache.close(); sbQueue.close(); webStructure.flushCitationReference("crg"); webStructure.close(); @@ -2754,7 +2749,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // explicit delete/free resources if ((entry != null) && (entry.profile() != null) && (!(entry.profile().storeHTCache()))) { plasmaHTCache.filesInUse.remove(entry.cacheFile()); - cacheManager.deleteFile(entry.url()); + plasmaHTCache.deleteFile(entry.url()); } entry = null; diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java index 1727294c7..01f4c6542 100644 --- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java +++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java @@ -64,14 +64,12 @@ public class plasmaSwitchboardQueue { private kelondroStack sbQueueStack; private plasmaCrawlProfile profiles; - plasmaHTCache htCache; private plasmaCrawlLURL lurls; private File sbQueueStackPath; - public plasmaSwitchboardQueue(plasmaHTCache htCache, plasmaCrawlLURL lurls, File sbQueueStackPath, plasmaCrawlProfile profiles) { + public plasmaSwitchboardQueue(plasmaCrawlLURL lurls, File sbQueueStackPath, plasmaCrawlProfile profiles) { this.sbQueueStackPath = sbQueueStackPath; this.profiles = profiles; - this.htCache = htCache; this.lurls = lurls; initQueueStack(); @@ -277,7 +275,7 @@ public class plasmaSwitchboardQueue { } public File cacheFile() { - return htCache.getCachePath(url); + return plasmaHTCache.getCachePath(url); } public boolean proxy() { @@ -303,7 +301,7 @@ public class plasmaSwitchboardQueue { private IResourceInfo getCachedObjectInfo() { if (this.contentInfo == null) try { - this.contentInfo = plasmaSwitchboardQueue.this.htCache.loadResourceInfo(this.url); + this.contentInfo = plasmaHTCache.loadResourceInfo(this.url); } catch (Exception e) { serverLog.logSevere("PLASMA", "responseHeader: failed to get header", e); return null; diff --git a/source/de/anomic/server/serverInstantThread.java b/source/de/anomic/server/serverInstantThread.java index 1018384f3..c5a8c5643 100644 --- a/source/de/anomic/server/serverInstantThread.java +++ b/source/de/anomic/server/serverInstantThread.java @@ -59,8 +59,9 @@ public final class serverInstantThread extends serverAbstractThread implements s // jobExec is the name of a method of the object 'env' that executes the one-step-run // jobCount is the name of a method that returns the size of the job // freemem is the name of a method that tries to free memory and returns void + Class theClass = (env instanceof Class) ? (Class) env : env.getClass(); try { - this.jobExecMethod = env.getClass().getMethod(jobExec, new Class[0]); + this.jobExecMethod = theClass.getMethod(jobExec, new Class[0]); } catch (NoSuchMethodException e) { throw new RuntimeException("serverInstantThread, wrong declaration of jobExec: " + e.getMessage()); } @@ -68,7 +69,7 @@ public final class serverInstantThread extends serverAbstractThread implements s if (jobCount == null) this.jobCountMethod = null; else - this.jobCountMethod = env.getClass().getMethod(jobCount, new Class[0]); + this.jobCountMethod = theClass.getMethod(jobCount, new Class[0]); } catch (NoSuchMethodException e) { throw new RuntimeException("serverInstantThread, wrong declaration of jobCount: " + e.getMessage()); @@ -77,13 +78,13 @@ public final class serverInstantThread extends serverAbstractThread implements s if (freemem == null) this.freememExecMethod = null; else - this.freememExecMethod = env.getClass().getMethod(freemem, new Class[0]); + this.freememExecMethod = theClass.getMethod(freemem, new Class[0]); } catch (NoSuchMethodException e) { throw new RuntimeException("serverInstantThread, wrong declaration of freemem: " + e.getMessage()); } - this.environment = env; - this.setName(env.getClass().getName() + "." + jobExec); + this.environment = (env instanceof Class) ? null : env; + this.setName(theClass.getName() + "." + jobExec); this.handle = new Long(System.currentTimeMillis() + this.getName().hashCode()); } diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 30d81b0ef..5ee29bb5e 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -69,6 +69,7 @@ import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMapObjects; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.net.URL; +import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverDomains; @@ -732,7 +733,7 @@ public final class yacySeedDB { File seedFile = null; try { // create a seed file which for uploading ... - seedFile = File.createTempFile("seedFile",".txt",((plasmaSwitchboard)sb).cacheManager.cachePath); + seedFile = File.createTempFile("seedFile",".txt", plasmaHTCache.cachePath); seedFile.deleteOnExit(); serverLog.logFine("YACY","SaveSeedList: Storing seedlist into tempfile " + seedFile.toString()); ArrayList uv = storeCache(seedFile, true);