From 7989335ed653ca6befeabf50d4a990078d952f1c Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 19 Aug 2008 14:10:40 +0000
Subject: [PATCH] Preparations to replace the HTCache with a new storage data
 structure: - refactoring of the HTCache (separation of cache entry) - added
 new storage class for BLOBs. (not used yet, this is half-way to a new
 structure)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5062 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 build.properties                              |   2 +-
 htroot/ViewFile.java                          |   3 +-
 source/de/anomic/crawler/CrawlQueues.java     |   4 +-
 source/de/anomic/crawler/FTPLoader.java       |  13 +-
 source/de/anomic/crawler/HTTPLoader.java      |  10 +-
 source/de/anomic/crawler/LoaderMessage.java   |  10 +-
 source/de/anomic/crawler/ProtocolLoader.java  |   6 +-
 .../de/anomic/http/httpdProxyCacheEntry.java  | 335 +++++++++++++++++
 source/de/anomic/http/httpdProxyHandler.java  |   7 +-
 source/de/anomic/icap/icapd.java              |   4 +-
 .../de/anomic/kelondro/kelondroBLOBArray.java | 242 +++++++++++++
 .../kelondro/kelondroMergeIterator.java       |   4 +-
 .../anomic/kelondro/kelondroSplitTable.java   |  13 +-
 source/de/anomic/plasma/plasmaHTCache.java    | 340 +-----------------
 .../de/anomic/plasma/plasmaSnippetCache.java  |   7 +-
 .../de/anomic/plasma/plasmaSwitchboard.java   |   3 +-
 source/de/anomic/plasma/plasmaWordIndex.java  |   5 +-
 source/de/anomic/ymage/ymageOSM.java          |   3 +-
 18 files changed, 644 insertions(+), 367 deletions(-)
 create mode 100755 source/de/anomic/http/httpdProxyCacheEntry.java
 create mode 100755 source/de/anomic/kelondro/kelondroBLOBArray.java

diff --git a/build.properties b/build.properties
index 135ba9bdf..19c3c4fbc 100644
--- a/build.properties
+++ b/build.properties
@@ -3,7 +3,7 @@ javacSource=1.5
 javacTarget=1.5
 
 # Release Configuration
-releaseVersion=0.593
+releaseVersion=0.594
 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
 proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index 90f9305b4..bd3d19f3b 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -37,6 +37,7 @@ import de.anomic.data.htmlTools;
 import de.anomic.htmlFilter.htmlFilterImageEntry;
 import de.anomic.http.HttpClient;
 import de.anomic.http.httpHeader;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.index.indexURLReference;
 import de.anomic.plasma.plasmaCondenser;
 import de.anomic.plasma.plasmaHTCache;
@@ -145,7 +146,7 @@ public class ViewFile {
 
         // if the resource body was not cached we try to load it from web
         if (resource == null) {
-            plasmaHTCache.Entry entry = null;
+            httpdProxyCacheEntry entry = null;
             try {
                 entry = sb.crawlQueues.loadResourceFromWeb(url, 5000, false, true, false);
             } catch (final Exception e) {
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index 0bdae76cc..e574851bb 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -36,8 +36,8 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.kelondro.kelondroFlexWidthArray;
-import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaSwitchboardConstants;
@@ -465,7 +465,7 @@ public class CrawlQueues {
         return;
     }
     
-    public plasmaHTCache.Entry loadResourceFromWeb(
+    public httpdProxyCacheEntry loadResourceFromWeb(
             final yacyURL url, 
             final int socketTimeout,
             final boolean keepInMemory,
diff --git a/source/de/anomic/crawler/FTPLoader.java b/source/de/anomic/crawler/FTPLoader.java
index 0953d6cee..dab1224e1 100644
--- a/source/de/anomic/crawler/FTPLoader.java
+++ b/source/de/anomic/crawler/FTPLoader.java
@@ -34,6 +34,7 @@ import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.util.Date;
 
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.net.ftpc;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
@@ -54,9 +55,9 @@ public class FTPLoader {
         maxFileSize = (int) sb.getConfigLong("crawler.ftp.maxFileSize", -1l);
     }
 
-    protected plasmaHTCache.Entry createCacheEntry(final CrawlEntry entry, final String mimeType,
+    protected httpdProxyCacheEntry createCacheEntry(final CrawlEntry entry, final String mimeType,
             final Date fileDate) {
-        return plasmaHTCache.newEntry(new Date(), entry.depth(), entry.url(), entry.name(), "OK", new ResourceInfo(
+        return plasmaHTCache.newEntry(entry.depth(), entry.url(), entry.name(), "OK", new ResourceInfo(
                 entry.url(), sb.getURL(entry.referrerhash()), mimeType, fileDate), entry.initiator(),
                 sb.webIndex.profilesActiveCrawls.getEntry(entry.profileHandle()));
     }
@@ -67,13 +68,13 @@ public class FTPLoader {
      * @param entry
      * @return
      */
-    public plasmaHTCache.Entry load(final CrawlEntry entry) {
+    public httpdProxyCacheEntry load(final CrawlEntry entry) {
         final yacyURL entryUrl = entry.url();
         final String fullPath = getPath(entryUrl);
         final File cacheFile = createCachefile(entryUrl);
 
         // the return value
-        plasmaHTCache.Entry htCache = null;
+        httpdProxyCacheEntry htCache = null;
 
         // determine filename and path
         String file, path;
@@ -232,7 +233,7 @@ public class FTPLoader {
      * @return
      * @throws Exception
      */
-    private plasmaHTCache.Entry getFile(final ftpc ftpClient, final CrawlEntry entry, final File cacheFile)
+    private httpdProxyCacheEntry getFile(final ftpc ftpClient, final CrawlEntry entry, final File cacheFile)
             throws Exception {
         // determine the mimetype of the resource
         final yacyURL entryUrl = entry.url();
@@ -242,7 +243,7 @@ public class FTPLoader {
 
         // if the mimetype and file extension is supported we start to download
         // the file
-        plasmaHTCache.Entry htCache = null;
+        httpdProxyCacheEntry htCache = null;
         if (plasmaParser.supportedContent(plasmaParser.PARSER_MODE_CRAWLER, entryUrl, mimeType)) {
             // aborting download if content is too long
             final int size = ftpClient.fileSize(path);
diff --git a/source/de/anomic/crawler/HTTPLoader.java b/source/de/anomic/crawler/HTTPLoader.java
index c7dd43592..0e9e054bd 100644
--- a/source/de/anomic/crawler/HTTPLoader.java
+++ b/source/de/anomic/crawler/HTTPLoader.java
@@ -42,6 +42,7 @@ import de.anomic.http.httpHeader;
 import de.anomic.http.httpdBoundedSizeOutputStream;
 import de.anomic.http.httpdByteCountOutputStream;
 import de.anomic.http.httpdLimitExceededException;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.index.indexReferenceBlacklist;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
@@ -95,10 +96,9 @@ public final class HTTPLoader {
      * @param responseStatus Status-Code SPACE Reason-Phrase
      * @return
      */
-    protected plasmaHTCache.Entry createCacheEntry(final CrawlEntry entry, final Date requestDate, final httpHeader requestHeader, final httpHeader responseHeader, final String responseStatus) {
+    protected httpdProxyCacheEntry createCacheEntry(final CrawlEntry entry, final Date requestDate, final httpHeader requestHeader, final httpHeader responseHeader, final String responseStatus) {
         final IResourceInfo resourceInfo = new ResourceInfo(entry.url(), requestHeader, responseHeader);
         return plasmaHTCache.newEntry(
-                requestDate, 
                 entry.depth(),
                 entry.url(),
                 entry.name(),
@@ -109,11 +109,11 @@ public final class HTTPLoader {
         );
     }    
    
-    public plasmaHTCache.Entry load(final CrawlEntry entry, final String parserMode) {
+    public httpdProxyCacheEntry load(final CrawlEntry entry, final String parserMode) {
         return load(entry, parserMode, DEFAULT_CRAWLING_RETRY_COUNT);
     }
     
-    private plasmaHTCache.Entry load(final CrawlEntry entry, final String parserMode, final int retryCount) {
+    private httpdProxyCacheEntry load(final CrawlEntry entry, final String parserMode, final int retryCount) {
 
         if (retryCount < 0) {
             this.log.logInfo("Redirection counter exceeded for URL " + entry.url().toString() + ". Processing aborted.");
@@ -137,7 +137,7 @@ public final class HTTPLoader {
         }
         
         // take a file from the net
-        plasmaHTCache.Entry htCache = null;
+        httpdProxyCacheEntry htCache = null;
         final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE);
         try {
             // create a request header
diff --git a/source/de/anomic/crawler/LoaderMessage.java b/source/de/anomic/crawler/LoaderMessage.java
index 54ed0a4b2..c9c5e287a 100644
--- a/source/de/anomic/crawler/LoaderMessage.java
+++ b/source/de/anomic/crawler/LoaderMessage.java
@@ -23,7 +23,7 @@
 
 package de.anomic.crawler;
 
-import de.anomic.plasma.plasmaHTCache;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.server.serverSemaphore;
 import de.anomic.yacy.yacyURL;
 
@@ -41,7 +41,7 @@ public final class LoaderMessage {
     public final boolean keepInMemory;
     
     private serverSemaphore resultSync  = null;
-    private plasmaHTCache.Entry result;
+    private httpdProxyCacheEntry result;
     private String errorMessage;
     
     // loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) {
@@ -80,7 +80,7 @@ public final class LoaderMessage {
         return this.errorMessage;
     }
     
-    public void setResult(final plasmaHTCache.Entry theResult) {
+    public void setResult(final httpdProxyCacheEntry theResult) {
         // store the result
         this.result = theResult;
         
@@ -88,8 +88,8 @@ public final class LoaderMessage {
         this.resultSync.V();        
     }
     
-    public plasmaHTCache.Entry waitForResult() throws InterruptedException {
-        plasmaHTCache.Entry theResult = null;
+    public httpdProxyCacheEntry waitForResult() throws InterruptedException {
+        httpdProxyCacheEntry theResult = null;
         
         this.resultSync.P();
         /* =====> CRITICAL SECTION <======== */
diff --git a/source/de/anomic/crawler/ProtocolLoader.java b/source/de/anomic/crawler/ProtocolLoader.java
index e6653c03f..0044a12ad 100644
--- a/source/de/anomic/crawler/ProtocolLoader.java
+++ b/source/de/anomic/crawler/ProtocolLoader.java
@@ -32,7 +32,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
-import de.anomic.plasma.plasmaHTCache;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverCore;
 import de.anomic.server.logging.serverLog;
@@ -68,7 +68,7 @@ public final class ProtocolLoader {
         return (HashSet<String>) this.supportedProtocols.clone();
     }
     
-    public plasmaHTCache.Entry load(final CrawlEntry entry, final String parserMode) {
+    public httpdProxyCacheEntry load(final CrawlEntry entry, final String parserMode) {
         // getting the protocol of the next URL                
         final String protocol = entry.url().getProtocol();
         final String host = entry.url().getHost();
@@ -109,7 +109,7 @@ public final class ProtocolLoader {
     public String process(final CrawlEntry entry, final String parserMode) {
         // load a resource, store it to htcache and push queue entry to switchboard queue
         // returns null if everything went fine, a fail reason string if a problem occurred
-        plasmaHTCache.Entry h;
+        httpdProxyCacheEntry h;
         try {
             h = load(entry, parserMode);
             entry.setStatus("loaded");
diff --git a/source/de/anomic/http/httpdProxyCacheEntry.java b/source/de/anomic/http/httpdProxyCacheEntry.java
new file mode 100755
index 000000000..ddbcd1dea
--- /dev/null
+++ b/source/de/anomic/http/httpdProxyCacheEntry.java
@@ -0,0 +1,335 @@
+// httpdProxyCacheEntry.java
+// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 19.08.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package de.anomic.http;
+
+import java.io.File;
+import java.util.Date;
+
+import de.anomic.crawler.CrawlProfile;
+import de.anomic.plasma.plasmaHTCache;
+import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.server.serverSystem;
+import de.anomic.yacy.yacyURL;
+
+public class httpdProxyCacheEntry {
+    
+    // doctypes:
+    public static final char DT_PDFPS   = 'p';
+    public static final char DT_TEXT    = 't';
+    public static final char DT_HTML    = 'h';
+    public static final char DT_DOC     = 'd';
+    public static final char DT_IMAGE   = 'i';
+    public static final char DT_MOVIE   = 'm';
+    public static final char DT_FLASH   = 'f';
+    public static final char DT_SHARE   = 's';
+    public static final char DT_AUDIO   = 'a';
+    public static final char DT_BINARY  = 'b';
+    public static final char DT_UNKNOWN = 'u';
+
+    // the class objects
+    private final  int                depth;           // the depth of pre-fetching
+    private final  String             responseStatus;
+    private final  File               cacheFile;       // the cache file
+    private        byte[]             cacheArray;      // or the cache as byte-array
+    private final  yacyURL            url;
+    private final  String             name;            // the name of the link, read as anchor from an <a>-tag
+    private final  Date               lastModified;
+    private        char               doctype;
+    private final  String             language;
+    private final  CrawlProfile.entry profile;
+    private final  String             initiator;
+
+    /**
+     * protocol specific information about the resource
+     */
+    private final IResourceInfo resInfo;
+
+    // doctype calculation
+    public static char docType(final yacyURL url) {
+        final String path = url.getPath().toLowerCase();
+        // serverLog.logFinest("PLASMA", "docType URL=" + path);
+        char doctype = DT_UNKNOWN;
+        if (path.endsWith(".gif"))       { doctype = DT_IMAGE; }
+        else if (path.endsWith(".ico"))  { doctype = DT_IMAGE; }
+        else if (path.endsWith(".bmp"))  { doctype = DT_IMAGE; }
+        else if (path.endsWith(".jpg"))  { doctype = DT_IMAGE; }
+        else if (path.endsWith(".jpeg")) { doctype = DT_IMAGE; }
+        else if (path.endsWith(".png"))  { doctype = DT_IMAGE; }
+        else if (path.endsWith(".html")) { doctype = DT_HTML;  }
+        else if (path.endsWith(".txt"))  { doctype = DT_TEXT;  }
+        else if (path.endsWith(".doc"))  { doctype = DT_DOC;   }
+        else if (path.endsWith(".rtf"))  { doctype = DT_DOC;   }
+        else if (path.endsWith(".pdf"))  { doctype = DT_PDFPS; }
+        else if (path.endsWith(".ps"))   { doctype = DT_PDFPS; }
+        else if (path.endsWith(".avi"))  { doctype = DT_MOVIE; }
+        else if (path.endsWith(".mov"))  { doctype = DT_MOVIE; }
+        else if (path.endsWith(".qt"))   { doctype = DT_MOVIE; }
+        else if (path.endsWith(".mpg"))  { doctype = DT_MOVIE; }
+        else if (path.endsWith(".md5"))  { doctype = DT_SHARE; }
+        else if (path.endsWith(".mpeg")) { doctype = DT_MOVIE; }
+        else if (path.endsWith(".asf"))  { doctype = DT_FLASH; }
+        return doctype;
+    }
+
+    public static char docType(final String mime) {
+        // serverLog.logFinest("PLASMA", "docType mime=" + mime);
+        char doctype = DT_UNKNOWN;
+        if (mime == null) doctype = DT_UNKNOWN;
+        else if (mime.startsWith("image/")) doctype = DT_IMAGE;
+        else if (mime.endsWith("/gif")) doctype = DT_IMAGE;
+        else if (mime.endsWith("/jpeg")) doctype = DT_IMAGE;
+        else if (mime.endsWith("/png")) doctype = DT_IMAGE;
+        else if (mime.endsWith("/html")) doctype = DT_HTML;
+        else if (mime.endsWith("/rtf")) doctype = DT_DOC;
+        else if (mime.endsWith("/pdf")) doctype = DT_PDFPS;
+        else if (mime.endsWith("/octet-stream")) doctype = DT_BINARY;
+        else if (mime.endsWith("/x-shockwave-flash")) doctype = DT_FLASH;
+        else if (mime.endsWith("/msword")) doctype = DT_DOC;
+        else if (mime.endsWith("/mspowerpoint")) doctype = DT_DOC;
+        else if (mime.endsWith("/postscript")) doctype = DT_PDFPS;
+        else if (mime.startsWith("text/")) doctype = DT_TEXT;
+        else if (mime.startsWith("image/")) doctype = DT_IMAGE;
+        else if (mime.startsWith("audio/")) doctype = DT_AUDIO;
+        else if (mime.startsWith("video/")) doctype = DT_MOVIE;
+        //bz2     = application/x-bzip2
+        //dvi     = application/x-dvi
+        //gz      = application/gzip
+        //hqx     = application/mac-binhex40
+        //lha     = application/x-lzh
+        //lzh     = application/x-lzh
+        //pac     = application/x-ns-proxy-autoconfig
+        //php     = application/x-httpd-php
+        //phtml   = application/x-httpd-php
+        //rss     = application/xml
+        //tar     = application/tar
+        //tex     = application/x-tex
+        //tgz     = application/tar
+        //torrent = application/x-bittorrent
+        //xhtml   = application/xhtml+xml
+        //xla     = application/msexcel
+        //xls     = application/msexcel
+        //xsl     = application/xml
+        //xml     = application/xml
+        //Z       = application/x-compress
+        //zip     = application/zip
+        return doctype;
+    }
+    
+    public httpdProxyCacheEntry(final int depth,
+            final yacyURL url, final String name, final String responseStatus,
+            final IResourceInfo resourceInfo, final String initiator,
+            final CrawlProfile.entry profile) {
+        if (resourceInfo == null) {
+            System.out.println("Content information object is null. " + url);
+            System.exit(0);
+        }
+        this.resInfo = resourceInfo;
+        this.url = url;
+        this.name = name;
+        this.cacheFile = plasmaHTCache.getCachePath(this.url);
+
+        // assigned:
+        this.depth = depth;
+        this.responseStatus = responseStatus;
+        this.profile = profile;
+        this.initiator = (initiator == null) ? null : ((initiator.length() == 0) ? null : initiator);
+
+        // getting the last modified date
+        this.lastModified = resourceInfo.getModificationDate();
+
+        // getting the doctype
+        this.doctype = docType(resourceInfo.getMimeType());
+        if (this.doctype == DT_UNKNOWN)
+            this.doctype = docType(url);
+        this.language = yacyURL.language(url);
+
+        // to be defined later:
+        this.cacheArray = null;
+    }
+
+    public String name() {
+        // the anchor name; can be either the text inside the anchor tag or the
+        // page description after loading of the page
+        return this.name;
+    }
+
+    public yacyURL url() {
+        return this.url;
+    }
+
+    public String urlHash() {
+        return this.url.hash();
+    }
+
+    public Date lastModified() {
+        return this.lastModified;
+    }
+
+    public String language() {
+        return this.language;
+    }
+
+    public CrawlProfile.entry profile() {
+        return this.profile;
+    }
+
+    public String initiator() {
+        return this.initiator;
+    }
+
+    public boolean proxy() {
+        return initiator() == null;
+    }
+
+    public long size() {
+        if (this.cacheArray == null)
+            return 0;
+        return this.cacheArray.length;
+    }
+
+    public int depth() {
+        return this.depth;
+    }
+
+    public yacyURL referrerURL() {
+        return (this.resInfo == null) ? null : this.resInfo.getRefererUrl();
+    }
+
+    public File cacheFile() {
+        return this.cacheFile;
+    }
+
+    public void setCacheArray(final byte[] data) {
+        this.cacheArray = data;
+    }
+
+    public byte[] cacheArray() {
+        return this.cacheArray;
+    }
+
+    public IResourceInfo getDocumentInfo() {
+        return this.resInfo;
+    }
+
+    public String getMimeType() {
+        return (this.resInfo == null) ? null : this.resInfo.getMimeType();
+    }
+
+    public Date ifModifiedSince() {
+        return (this.resInfo == null) ? null : this.resInfo.ifModifiedSince();
+    }
+
+    public boolean requestWithCookie() {
+        return (this.resInfo == null) ? false : this.resInfo.requestWithCookie();
+    }
+
+    public boolean requestProhibitsIndexing() {
+        return (this.resInfo == null) ? false : this.resInfo.requestProhibitsIndexing();
+    }
+
+
+    // the following three methods for cache read/write granting shall be as loose
+    // as possible but also as strict as necessary to enable caching of most items
+
+    /**
+     * @return NULL if the answer is TRUE, in case of FALSE, the reason as
+     *         String is returned
+     */
+    public String shallStoreCacheForProxy() {
+
+        // check profile (disabled: we will check this in the plasmaSwitchboard)
+        // if (!this.profile.storeHTCache()) { return "storage_not_wanted"; }
+
+        // decide upon header information if a specific file should be stored to
+        // the cache or not
+        // if the storage was requested by prefetching, the request map is null
+
+        // check status code
+        if ((this.resInfo != null)
+                && (!this.resInfo.validResponseStatus(this.responseStatus))) {
+            return "bad_status_" + this.responseStatus.substring(0, 3);
+        }
+
+        // check storage location
+        // sometimes a file name is equal to a path name in the same directory;
+        // or sometimes a file name is equal a directory name created earlier;
+        // we cannot match that here in the cache file path and therefore omit
+        // writing into the cache
+        if (this.cacheFile.getParentFile().isFile()
+                || this.cacheFile.isDirectory()) {
+            return "path_ambiguous";
+        }
+        if (this.cacheFile.toString().indexOf("..") >= 0) {
+            return "path_dangerous";
+        }
+        if (this.cacheFile.getAbsolutePath().length() > serverSystem.maxPathLength) {
+            return "path too long";
+        }
+
+        // -CGI access in request
+        // CGI access makes the page very individual, and therefore not usable
+        // in caches
+        if (this.url.isPOST() && !this.profile.crawlingQ()) {
+            return "dynamic_post";
+        }
+        if (this.url.isCGI()) {
+            return "dynamic_cgi";
+        }
+
+        if (this.resInfo != null) {
+            return this.resInfo.shallStoreCacheForProxy();
+        }
+
+        return null;
+    }
+
+    /**
+     * decide upon header information if a specific file should be taken from
+     * the cache or not
+     * 
+     * @return whether the file should be taken from the cache
+     */
+    public boolean shallUseCacheForProxy() {
+
+        // -CGI access in request
+        // CGI access makes the page very individual, and therefore not usable
+        // in caches
+        if (this.url.isPOST()) {
+            return false;
+        }
+        if (this.url.isCGI()) {
+            return false;
+        }
+
+        if (this.resInfo != null) {
+            return this.resInfo.shallUseCacheForProxy();
+        }
+
+        return true;
+    }
+
+}
diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index b0f7893b2..3a1323a30 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -400,9 +400,8 @@ public final class httpdProxyHandler {
             // 4. cache stale - refill - superfluous
             // in two of these cases we trigger a scheduler to handle newly arrived files:
             // case 1 and case 3
-            final plasmaHTCache.Entry cacheEntry = (cachedResponseHeader == null) ? null :
+            final httpdProxyCacheEntry cacheEntry = (cachedResponseHeader == null) ? null :
                 plasmaHTCache.newEntry(
-                    requestDate,                     // init date 
                     0,                               // crawling depth
                     url,                             // url
                     "",                              // name of the url is unknown
@@ -526,10 +525,8 @@ public final class httpdProxyHandler {
             }            
 
             // reserver cache entry
-            final Date requestDate = new Date(((Long)conProp.get(httpHeader.CONNECTION_PROP_REQUEST_START)).longValue());
             final IResourceInfo resInfo = new ResourceInfo(url,requestHeader,responseHeader);
-            final plasmaHTCache.Entry cacheEntry = plasmaHTCache.newEntry(
-                    requestDate, 
+            final httpdProxyCacheEntry cacheEntry = plasmaHTCache.newEntry(
                     0, 
                     url,
                     "",
diff --git a/source/de/anomic/icap/icapd.java b/source/de/anomic/icap/icapd.java
index a632042ec..10b393667 100644
--- a/source/de/anomic/icap/icapd.java
+++ b/source/de/anomic/icap/icapd.java
@@ -40,6 +40,7 @@ import java.util.Properties;
 import de.anomic.http.HttpClient;
 import de.anomic.http.httpChunkedInputStream;
 import de.anomic.http.httpHeader;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
@@ -376,8 +377,7 @@ public class icapd implements serverHandler, Cloneable {
             
             // generating a htcache entry object
             final IResourceInfo resInfo = new ResourceInfo(httpRequestURL,httpReqHeader,httpResHeader);
-            final plasmaHTCache.Entry cacheEntry = plasmaHTCache.newEntry(
-                    new Date(),  
+            final httpdProxyCacheEntry cacheEntry = plasmaHTCache.newEntry(
                     0, 
                     httpRequestURL,
                     "",
diff --git a/source/de/anomic/kelondro/kelondroBLOBArray.java b/source/de/anomic/kelondro/kelondroBLOBArray.java
new file mode 100755
index 000000000..0bd2bb7d7
--- /dev/null
+++ b/source/de/anomic/kelondro/kelondroBLOBArray.java
@@ -0,0 +1,242 @@
+// kelondroBLOBArray.java
+// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 19.08.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package de.anomic.kelondro;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+import java.util.TreeMap;
+import java.util.concurrent.CopyOnWriteArrayList;
+
+import de.anomic.server.serverDate;
+
+public class kelondroBLOBArray implements kelondroBLOB {
+
+    /*
+     * This class implements a BLOB using a set of kelondroBLOBHeap objects
+     * In addition to a kelondroBLOBHeap this BLOB can delete large amounts of data using a given time limit.
+     * This is realized by creating separate BLOB files. New Files are created when either
+     * - a given time limit is reached
+     * - a given space limit is reached
+     * To organize such an array of BLOB files, the following file name structure is used:
+     * <BLOB-Name>/<YYYYMMDDhhmm>.blob
+     * That means all BLOB files are inside a directory that has the name of the BLOBArray.
+     * To delete content that is out-dated, one special method is implemented that deletes content by a given
+     * time-out. Deletions are not made automatically, they must be triggered using this method.
+     */
+    
+    private int keylength;
+    private kelondroByteOrder ordering;
+    private File heapLocation;
+    private long maxage;
+    private long maxsize;
+    private List<blobItem> blobs;
+    
+    public kelondroBLOBArray(
+            final File heapLocation,
+            final int keylength, final kelondroByteOrder ordering,
+            long maxage, long maxsize
+            ) throws IOException {
+        this.keylength = keylength;
+        this.ordering = ordering;
+        this.heapLocation = heapLocation;
+        this.maxage = maxage;
+        this.maxsize = maxsize;
+
+        // check existence of the heap directory
+        if (heapLocation.exists()) {
+            if (!heapLocation.isDirectory()) throw new IOException("the BLOBArray directory " + heapLocation.toString() + " does not exist (is blocked by a file with same name");
+        } else {
+            heapLocation.mkdirs();
+        }
+
+        // register all blob files inside this directory
+        String[] files = heapLocation.list();
+        Date d;
+        TreeMap<Long, blobItem> sortedItems = new TreeMap<Long, blobItem>();
+        kelondroBLOB oneBlob;
+        File f;
+        for (int i = 0; i < files.length; i++) {
+            if (files[i].length() == 17 && files[i].endsWith("blob")) {
+               try {
+                   d = serverDate.parseShortSecond(files[i].substring(0, 12));
+               } catch (ParseException e) {continue;}
+               f = new File(heapLocation, files[i]);
+               oneBlob = new kelondroBLOBHeap(f, keylength, ordering);
+               sortedItems.put(new Long(d.getTime()), new blobItem(d, f, oneBlob));
+            }
+        }
+        
+        // read the blob tree in a sorted way and write them into an array
+        blobs = new CopyOnWriteArrayList<blobItem>();
+        for (blobItem bi : sortedItems.values()) {
+            blobs.add(bi);
+        }
+    }
+    
+    private class blobItem {
+        Date creation;
+        File location;
+        kelondroBLOB blob;
+        public blobItem(Date creation, File location, kelondroBLOB blob) {
+            this.creation = creation;
+            this.location = location;
+            this.blob = blob;
+        }
+        public blobItem() throws IOException {
+            // make a new blob file and assign it in this item
+            this.creation = new Date();
+            this.location = new File(heapLocation, serverDate.formatShortSecond(creation) + ".blob");
+            this.blob = new kelondroBLOBHeap(location, keylength, ordering);;
+        }
+    }
+    
+    /**
+     * ask for the length of the primary key
+     * @return the length of the key
+     */
+    public int keylength() {
+        return this.keylength;
+    }
+    
+    /**
+     * clears the content of the database
+     * @throws IOException
+     */
+    public void clear() throws IOException {
+        for (blobItem bi: blobs) bi.blob.clear();
+        blobs.clear();
+    }
+    
+    /**
+     * ask for the number of entries
+     * @return the number of entries in the table
+     */
+    public int size() {
+        int s = 0;
+        for (blobItem bi: blobs) s += bi.blob.size();
+        return s;
+    }
+    
+    /**
+     * iterator over all keys
+     * @param up
+     * @param rotating
+     * @return
+     * @throws IOException
+     */
+    public kelondroCloneableIterator<byte[]> keys(boolean up, boolean rotating) throws IOException {
+        assert rotating = false;
+        final List<kelondroCloneableIterator<byte[]>> c = new ArrayList<kelondroCloneableIterator<byte[]>>(blobs.size());
+        final Iterator<blobItem> i = blobs.iterator();
+        while (i.hasNext()) {
+            c.add(i.next().blob.keys(up, rotating));
+        }
+        return kelondroMergeIterator.cascade(c, this.ordering, kelondroMergeIterator.simpleMerge, up);
+    }
+    
+    /**
+     * iterate over all keys
+     * @param up
+     * @param firstKey
+     * @return
+     * @throws IOException
+     */
+    public kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
+        final List<kelondroCloneableIterator<byte[]>> c = new ArrayList<kelondroCloneableIterator<byte[]>>(blobs.size());
+        final Iterator<blobItem> i = blobs.iterator();
+        while (i.hasNext()) {
+            c.add(i.next().blob.keys(up, firstKey));
+        }
+        return kelondroMergeIterator.cascade(c, this.ordering, kelondroMergeIterator.simpleMerge, up);
+    }
+    
+    /**
+     * check if a specific key is in the database
+     * @param key  the primary key
+     * @return
+     * @throws IOException
+     */
+    public boolean has(byte[] key) throws IOException {
+        for (blobItem bi: blobs) if (bi.blob.has(key)) return true;
+        return false;
+    }
+    
+    /**
+     * retrieve the whole BLOB from the table
+     * @param key  the primary key
+     * @return
+     * @throws IOException
+     */
+    public byte[] get(byte[] key) throws IOException {
+        byte[] b;
+        for (blobItem bi: blobs) {
+            b = bi.blob.get(key);
+            if (b != null) return b;
+        }
+        return null;
+    }
+    
+    /**
+     * write a whole byte array as BLOB to the table
+     * @param key  the primary key
+     * @param b
+     * @throws IOException
+     */
+    public void put(byte[] key, byte[] b) throws IOException {
+        blobItem bi = (blobs.size() == 0) ? null : blobs.get(blobs.size() - 1);
+        if ((bi == null) || (bi.creation.getTime() - System.currentTimeMillis() > this.maxage) || (bi.location.length() > this.maxsize)) {
+            // add a new blob to the array
+            bi = new blobItem();
+            blobs.add(bi);
+        }
+        bi.blob.put(key, b);
+    }
+    
+    /**
+     * remove a BLOB
+     * @param key  the primary key
+     * @throws IOException
+     */
+    public void remove(byte[] key) throws IOException {
+        for (blobItem bi: blobs) bi.blob.remove(key);
+    }
+    
+    /**
+     * close the BLOB
+     */
+    public void close() {
+        for (blobItem bi: blobs) bi.blob.close();
+        blobs.clear();
+        blobs = null;
+    }
+    
+}
diff --git a/source/de/anomic/kelondro/kelondroMergeIterator.java b/source/de/anomic/kelondro/kelondroMergeIterator.java
index a09bd86ae..e8e83254a 100644
--- a/source/de/anomic/kelondro/kelondroMergeIterator.java
+++ b/source/de/anomic/kelondro/kelondroMergeIterator.java
@@ -24,10 +24,10 @@ package de.anomic.kelondro;
 
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.util.Collection;
 import java.util.Comparator;
 import java.util.ConcurrentModificationException;
 import java.util.Iterator;
-import java.util.Set;
 
 public class kelondroMergeIterator<E> implements kelondroCloneableIterator<E> {
     
@@ -119,7 +119,7 @@ public class kelondroMergeIterator<E> implements kelondroCloneableIterator<E> {
         throw new java.lang.UnsupportedOperationException("merge does not support remove");
     }
     
-    public static <A> kelondroCloneableIterator<A> cascade(final Set<kelondroCloneableIterator<A>> iterators, final kelondroOrder<A> c, final Method merger, final boolean up) {
+    public static <A> kelondroCloneableIterator<A> cascade(final Collection<kelondroCloneableIterator<A>> iterators, final kelondroOrder<A> c, final Method merger, final boolean up) {
         // this extends the ability to combine two iterators
         // to the ability of combining a set of iterators
         if (iterators == null) return null;
diff --git a/source/de/anomic/kelondro/kelondroSplitTable.java b/source/de/anomic/kelondro/kelondroSplitTable.java
index 682f79cd4..97d6d5ede 100644
--- a/source/de/anomic/kelondro/kelondroSplitTable.java
+++ b/source/de/anomic/kelondro/kelondroSplitTable.java
@@ -32,7 +32,6 @@ import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -380,21 +379,21 @@ public class kelondroSplitTable implements kelondroIndex {
     }
     
     public synchronized kelondroCloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
-        final HashSet<kelondroCloneableIterator<byte[]>> set = new HashSet<kelondroCloneableIterator<byte[]>>();
+        final List<kelondroCloneableIterator<byte[]>> c = new ArrayList<kelondroCloneableIterator<byte[]>>(tables.size());
         final Iterator<kelondroIndex> i = tables.values().iterator();
         while (i.hasNext()) {
-            set.add(i.next().keys(up, firstKey));
+            c.add(i.next().keys(up, firstKey));
         }
-        return kelondroMergeIterator.cascade(set, rowdef.objectOrder, kelondroMergeIterator.simpleMerge, up);
+        return kelondroMergeIterator.cascade(c, rowdef.objectOrder, kelondroMergeIterator.simpleMerge, up);
     }
     
     public synchronized kelondroCloneableIterator<kelondroRow.Entry> rows(final boolean up, final byte[] firstKey) throws IOException {
-        final HashSet<kelondroCloneableIterator<kelondroRow.Entry>> set = new HashSet<kelondroCloneableIterator<kelondroRow.Entry>>();
+        final List<kelondroCloneableIterator<kelondroRow.Entry>> c = new ArrayList<kelondroCloneableIterator<kelondroRow.Entry>>(tables.size());
         final Iterator<kelondroIndex> i = tables.values().iterator();
         while (i.hasNext()) {
-            set.add(i.next().rows(up, firstKey));
+            c.add(i.next().rows(up, firstKey));
         }
-        return kelondroMergeIterator.cascade(set, entryOrder, kelondroMergeIterator.simpleMerge, up);
+        return kelondroMergeIterator.cascade(c, entryOrder, kelondroMergeIterator.simpleMerge, up);
     }
 
     public final int cacheObjectChunkSize() {
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index 0fa85d47f..c364c6ba8 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -42,7 +42,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.InetAddress;
 import java.util.Collections;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.SortedMap;
@@ -53,6 +52,7 @@ import java.util.regex.Pattern;
 
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.http.httpHeader;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.kelondro.kelondroBLOB;
 import de.anomic.kelondro.kelondroBLOBHeap;
 import de.anomic.kelondro.kelondroBLOBTree;
@@ -66,7 +66,6 @@ import de.anomic.server.serverCodings;
 import de.anomic.server.serverDomains;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverInstantBusyThread;
-import de.anomic.server.serverSystem;
 import de.anomic.server.serverThread;
 import de.anomic.server.logging.serverLog;
 import de.anomic.tools.enumerateFiles;
@@ -82,7 +81,7 @@ public final class plasmaHTCache {
     public  static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day
 
     private static kelondroMap responseHeaderDB = null;
-    private static final ConcurrentLinkedQueue<Entry> cacheStack = new ConcurrentLinkedQueue<Entry>();
+    private static final ConcurrentLinkedQueue<httpdProxyCacheEntry> cacheStack = new ConcurrentLinkedQueue<httpdProxyCacheEntry>();
     private static final SortedMap<String, File> cacheAge = Collections.synchronizedSortedMap(new TreeMap<String, File>()); // a <date+hash, cache-path> - relation
     public static long curCacheSize = 0;
     public static long maxCacheSize = 0l;
@@ -93,19 +92,6 @@ public final class plasmaHTCache {
     private static ResourceInfoFactory objFactory = new ResourceInfoFactory();
     private static serverThread cacheScanThread = null;
 
-    // doctypes:
-    public static final char DT_PDFPS   = 'p';
-    public static final char DT_TEXT    = 't';
-    public static final char DT_HTML    = 'h';
-    public static final char DT_DOC     = 'd';
-    public static final char DT_IMAGE   = 'i';
-    public static final char DT_MOVIE   = 'm';
-    public static final char DT_FLASH   = 'f';
-    public static final char DT_SHARE   = 's';
-    public static final char DT_AUDIO   = 'a';
-    public static final char DT_BINARY  = 'b';
-    public static final char DT_UNKNOWN = 'u';
-
     // URL attributes
     public static final int UA_LOCAL    =  0; // URL was crawled locally
     public static final int UA_TILDE    =  1; // tilde appears in URL
@@ -115,76 +101,6 @@ public final class plasmaHTCache {
     public static final char LT_LOCAL   = 'L';
     public static final char LT_GLOBAL  = 'G';
 
-    // doctype calculation
-    public static char docType(final yacyURL url) {
-        final String path = url.getPath().toLowerCase();
-        // serverLog.logFinest("PLASMA", "docType URL=" + path);
-        char doctype = DT_UNKNOWN;
-        if (path.endsWith(".gif"))       { doctype = DT_IMAGE; }
-        else if (path.endsWith(".ico"))  { doctype = DT_IMAGE; }
-        else if (path.endsWith(".bmp"))  { doctype = DT_IMAGE; }
-        else if (path.endsWith(".jpg"))  { doctype = DT_IMAGE; }
-        else if (path.endsWith(".jpeg")) { doctype = DT_IMAGE; }
-        else if (path.endsWith(".png"))  { doctype = DT_IMAGE; }
-        else if (path.endsWith(".html")) { doctype = DT_HTML;  }
-        else if (path.endsWith(".txt"))  { doctype = DT_TEXT;  }
-        else if (path.endsWith(".doc"))  { doctype = DT_DOC;   }
-        else if (path.endsWith(".rtf"))  { doctype = DT_DOC;   }
-        else if (path.endsWith(".pdf"))  { doctype = DT_PDFPS; }
-        else if (path.endsWith(".ps"))   { doctype = DT_PDFPS; }
-        else if (path.endsWith(".avi"))  { doctype = DT_MOVIE; }
-        else if (path.endsWith(".mov"))  { doctype = DT_MOVIE; }
-        else if (path.endsWith(".qt"))   { doctype = DT_MOVIE; }
-        else if (path.endsWith(".mpg"))  { doctype = DT_MOVIE; }
-        else if (path.endsWith(".md5"))  { doctype = DT_SHARE; }
-        else if (path.endsWith(".mpeg")) { doctype = DT_MOVIE; }
-        else if (path.endsWith(".asf"))  { doctype = DT_FLASH; }
-        return doctype;
-    }
-
-    public static char docType(final String mime) {
-        // serverLog.logFinest("PLASMA", "docType mime=" + mime);
-        char doctype = DT_UNKNOWN;
-        if (mime == null) doctype = DT_UNKNOWN;
-        else if (mime.startsWith("image/")) doctype = DT_IMAGE;
-        else if (mime.endsWith("/gif")) doctype = DT_IMAGE;
-        else if (mime.endsWith("/jpeg")) doctype = DT_IMAGE;
-        else if (mime.endsWith("/png")) doctype = DT_IMAGE;
-        else if (mime.endsWith("/html")) doctype = DT_HTML;
-        else if (mime.endsWith("/rtf")) doctype = DT_DOC;
-        else if (mime.endsWith("/pdf")) doctype = DT_PDFPS;
-        else if (mime.endsWith("/octet-stream")) doctype = DT_BINARY;
-        else if (mime.endsWith("/x-shockwave-flash")) doctype = DT_FLASH;
-        else if (mime.endsWith("/msword")) doctype = DT_DOC;
-        else if (mime.endsWith("/mspowerpoint")) doctype = DT_DOC;
-        else if (mime.endsWith("/postscript")) doctype = DT_PDFPS;
-        else if (mime.startsWith("text/")) doctype = DT_TEXT;
-        else if (mime.startsWith("image/")) doctype = DT_IMAGE;
-        else if (mime.startsWith("audio/")) doctype = DT_AUDIO;
-        else if (mime.startsWith("video/")) doctype = DT_MOVIE;
-        //bz2     = application/x-bzip2
-        //dvi     = application/x-dvi
-        //gz      = application/gzip
-        //hqx     = application/mac-binhex40
-        //lha     = application/x-lzh
-        //lzh     = application/x-lzh
-        //pac     = application/x-ns-proxy-autoconfig
-        //php     = application/x-httpd-php
-        //phtml   = application/x-httpd-php
-        //rss     = application/xml
-        //tar     = application/tar
-        //tex     = application/x-tex
-        //tgz     = application/tar
-        //torrent = application/x-bittorrent
-        //xhtml   = application/xhtml+xml
-        //xla     = application/msexcel
-        //xls     = application/msexcel
-        //xsl     = application/xml
-        //xml     = application/xml
-        //Z       = application/x-compress
-        //zip     = application/zip
-        return doctype;
-    }
     
     public static void init(final File htCachePath, final long CacheSizeMax) {
         
@@ -301,11 +217,11 @@ public final class plasmaHTCache {
         return responseHeaderDB.size();
     }
     
-    public static void push(final Entry entry) {
+    public static void push(final httpdProxyCacheEntry entry) {
         cacheStack.add(entry);
     }
 
-    public static Entry pop() {
+    public static httpdProxyCacheEntry pop() {
         return cacheStack.poll();
     }
 
@@ -889,8 +805,7 @@ public final class plasmaHTCache {
         return 0;           
     }
 
-    public static Entry newEntry(
-            final Date initDate, 
+    public static httpdProxyCacheEntry newEntry(
             final int depth, 
             final yacyURL url,
             final String name,
@@ -899,8 +814,7 @@ public final class plasmaHTCache {
             final String initiator,
             final CrawlProfile.entry profile
     ) {
-        final Entry entry = new Entry(
-                initDate, 
+        final httpdProxyCacheEntry entry = new httpdProxyCacheEntry(
                 depth, 
                 url,
                 name,
@@ -909,244 +823,28 @@ public final class plasmaHTCache {
                 initiator, 
                 profile
         );
-        return entry;
-    }
-
-    /**
-     * @return the responseHeaderDB
-     */
-    static kelondroMap getResponseHeaderDB() {
-        return responseHeaderDB;
-    }
-
-    public final static class Entry {
-
-    // the class objects
-    private final Date                     initDate;       // the date when the request happened; will be used as a key
-    private final int                      depth;          // the depth of prefetching
-    private final String                   responseStatus;    
-    private final File                     cacheFile;      // the cache file
-    private byte[]                   cacheArray;     // or the cache as byte-array
-    private final yacyURL                  url;
-    private final String                   name;           // the name of the link, read as anchor from an <a>-tag
-    private final Date                     lastModified;
-    private char                     doctype;
-    private final String                   language;
-    private final CrawlProfile.entry profile;
-    private final String                   initiator;
-    
-    /**
-     * protocolspecific information about the resource 
-     */
-    private final IResourceInfo            resInfo;
-
-    protected Entry clone() {
-        return new Entry(
-                this.initDate,
-                this.depth,
-                this.url,
-                this.name,
-                this.responseStatus,
-                this.resInfo,
-                this.initiator,
-                this.profile
-        );
-    }
-
-    public Entry(final Date initDate, 
-            final int depth, 
-            final yacyURL url,
-            final String name,
-            final String responseStatus,
-            final IResourceInfo resourceInfo,            
-            final String initiator,
-            final CrawlProfile.entry profile
-    ) {
-        if (resourceInfo == null){
-            System.out.println("Content information object is null. " + url);
-            System.exit(0);
-        }
-        this.resInfo = resourceInfo;
-        this.url              = url;
-        this.name             = name;
-        this.cacheFile        = getCachePath(this.url);
-        
-        // assigned:
-        this.initDate       = initDate;
-        this.depth          = depth;
-        this.responseStatus = responseStatus;
-        this.profile        = profile;
-        this.initiator      = (initiator == null) ? null : ((initiator.length() == 0) ? null : initiator);
-
-        // getting the last modified date
-        this.lastModified = resourceInfo.getModificationDate();
-        
-        // getting the doctype
-        this.doctype = docType(resourceInfo.getMimeType());
-        if (this.doctype == DT_UNKNOWN) this.doctype = docType(url);
-        this.language = yacyURL.language(url);
-
-        // to be defined later:
-        this.cacheArray     = null;
-        
-        writeResourceInfo();
-    }
-
-    public String name() {
-        // the anchor name; can be either the text inside the anchor tag or the page description after loading of the page
-        return this.name;
-    }
-    
-    public yacyURL url() {
-        return this.url;
-    }
-    
-    public String urlHash() {
-        return this.url.hash();
-    }
-    
-    public Date lastModified() {
-        return this.lastModified;
-    }
-    
-    public String language() {
-        return this.language;
-    }
-    
-    public CrawlProfile.entry profile() {
-        return this.profile;
-    }
-    
-    public String initiator() {
-        return this.initiator;
-    }
-    public boolean proxy() {
-        return initiator() == null;
-    }
-    public long size() {
-        if (this.cacheArray == null) return 0;
-        return this.cacheArray.length;
-    }
-
-    public int depth() {
-        return this.depth;
-    }
-    
-    public yacyURL referrerURL() {
-        return (this.resInfo == null) ? null : this.resInfo.getRefererUrl();
-    }
-
-    public File cacheFile() {
-        return this.cacheFile;
-    }
-    
-    public void setCacheArray(final byte[] data) {
-        this.cacheArray = data;
-    }
-    
-    public byte[] cacheArray() {
-        return this.cacheArray;
-    }
-    
-    public IResourceInfo getDocumentInfo() {
-        return this.resInfo;
-    }
-    
-    private boolean writeResourceInfo() {
-        if (this.resInfo == null) return false;
-        try {
+        if (docInfo != null) try {
             final HashMap<String, String> hm = new HashMap<String, String>();
-            hm.putAll(this.resInfo.getMap());
-            hm.put("@@URL", this.url.toNormalform(false, false));
-            hm.put("@@DEPTH", Integer.toString(this.depth));
-            if (this.initiator != null) hm.put("@@INITIATOR", this.initiator);
-            plasmaHTCache.getResponseHeaderDB().put(this.url.hash(), hm);
+            hm.putAll(docInfo.getMap());
+            hm.put("@@URL", url.toNormalform(false, false));
+            hm.put("@@DEPTH", Integer.toString(depth));
+            if (initiator != null)
+                hm.put("@@INITIATOR", initiator);
+            plasmaHTCache.getResponseHeaderDB().put(url.hash(), hm);
         } catch (final Exception e) {
-            log.logWarning("could not write ResourceInfo: "+ e.getClass() +": "+ e.getMessage());
+            plasmaHTCache.log.logWarning("could not write ResourceInfo: "
+                    + e.getClass() + ": " + e.getMessage());
             plasmaHTCache.resetResponseHeaderDB();
-            return false;
-        }
-        return true;
-    }    
-    
-    public String getMimeType() {
-        return (this.resInfo == null) ? null : this.resInfo.getMimeType();
-    }
-    
-    public Date ifModifiedSince() {
-        return (this.resInfo == null) ? null : this.resInfo.ifModifiedSince();
-    }
-    
-    public boolean requestWithCookie() {
-        return (this.resInfo == null) ? false : this.resInfo.requestWithCookie();
-    }
-    
-    public boolean requestProhibitsIndexing() {
-        return (this.resInfo == null) ? false : this.resInfo.requestProhibitsIndexing();
-    }
-    
-    /*
-    public boolean update() {
-        return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD));
-    }
-    */
-
-    // the following three methods for cache read/write granting shall be as loose as possible
-    // but also as strict as necessary to enable caching of most items
-
-    /**
-     * @return NULL if the answer is TRUE, in case of FALSE, the reason as String is returned
-     */
-    public String shallStoreCacheForProxy() {
-
-        // check profile (disabled: we will check this in the plasmaSwitchboard)
-        //if (!this.profile.storeHTCache()) { return "storage_not_wanted"; }
-
-        // decide upon header information if a specific file should be stored to the cache or not
-        // if the storage was requested by prefetching, the request map is null
-
-        // check status code
-        if ((this.resInfo != null) && (!this.resInfo.validResponseStatus(this.responseStatus))) {
-            return "bad_status_" + this.responseStatus.substring(0,3);
         }
-        
-        // check storage location
-        // sometimes a file name is equal to a path name in the same directory;
-        // or sometimes a file name is equal a directory name created earlier;
-        // we cannot match that here in the cache file path and therefore omit writing into the cache
-        if (this.cacheFile.getParentFile().isFile() || this.cacheFile.isDirectory()) { return "path_ambiguous"; }
-        if (this.cacheFile.toString().indexOf("..") >= 0) { return "path_dangerous"; }
-        if (this.cacheFile.getAbsolutePath().length() > serverSystem.maxPathLength) { return "path too long"; }
-
-        // -CGI access in request
-        // CGI access makes the page very individual, and therefore not usable in caches
-        if (this.url.isPOST() && !this.profile.crawlingQ()) { return "dynamic_post"; }
-        if (this.url.isCGI()) { return "dynamic_cgi"; }
 
-        if (this.resInfo != null) {
-            return this.resInfo.shallStoreCacheForProxy();
-        }
-        
-        return null;
+        return entry;
     }
 
     /**
-     * decide upon header information if a specific file should be taken from the cache or not
-     * @return whether the file should be taken from the cache
+     * @return the responseHeaderDB
      */
-    public boolean shallUseCacheForProxy() {
-
-        // -CGI access in request
-        // CGI access makes the page very individual, and therefore not usable in caches
-        if (this.url.isPOST()) { return false; }
-        if (this.url.isCGI()) { return false; }
-        
-        if (this.resInfo != null) {
-            return this.resInfo.shallUseCacheForProxy();
-        }
-        
-        return true;
+    static kelondroMap getResponseHeaderDB() {
+        return responseHeaderDB;
     }
 
-    } // class Entry
 }
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 662d9fd1a..fb42c03a3 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -39,6 +39,7 @@ import java.util.TreeSet;
 import de.anomic.htmlFilter.htmlFilterImageEntry;
 import de.anomic.http.HttpClient;
 import de.anomic.http.httpHeader;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.index.indexURLReference;
 import de.anomic.index.indexWord;
 import de.anomic.kelondro.kelondroMScoreCluster;
@@ -284,7 +285,7 @@ public class plasmaSnippetCache {
                 // if not found try to download it
                 
                 // download resource using the crawler and keep resource in memory if possible
-                final plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, true, reindexing);
+                final httpdProxyCacheEntry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, true, reindexing);
                 
                 // getting resource metadata (e.g. the http headers for http resources)
                 if (entry != null) {
@@ -395,7 +396,7 @@ public class plasmaSnippetCache {
                 // if not found try to download it
                 
                 // download resource using the crawler and keep resource in memory if possible
-                final plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, forText, global);
+                final httpdProxyCacheEntry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, timeout, true, forText, global);
                 
                 // getting resource metadata (e.g. the http headers for http resources)
                 if (entry != null) {
@@ -853,7 +854,7 @@ public class plasmaSnippetCache {
                 // if the content is not available in cache try to download it from web
                 
                 // try to download the resource using a crawler
-                final plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout, true, forText, reindexing);
+                final httpdProxyCacheEntry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout, true, forText, reindexing);
                 if (entry == null) return null; // not found in web
                 
                 // read resource body (if it is there)
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 5e1768d6a..6a2f8716d 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -136,6 +136,7 @@ import de.anomic.http.JakartaCommonsHttpClient;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRemoteProxyConfig;
 import de.anomic.http.httpd;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.http.httpdRobotsTxtConfig;
 import de.anomic.index.indexReferenceBlacklist;
 import de.anomic.index.indexURLReference;
@@ -965,7 +966,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
         return this.webIndex.cleanProfiles();
     }
     
-    public boolean htEntryStoreProcess(final plasmaHTCache.Entry entry) {
+    public boolean htEntryStoreProcess(final httpdProxyCacheEntry entry) {
         
         if (entry == null) return false;
 
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index 42e7342a1..22e728e42 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -40,6 +40,7 @@ import java.util.TreeSet;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.IndexingStack;
 import de.anomic.htmlFilter.htmlFilterContentScraper;
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.index.indexCollectionRI;
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexContainerOrder;
@@ -819,7 +820,7 @@ public final class plasmaWordIndex implements indexRI {
                 new byte[0],                               // md5
                 (int) entry.size(),                        // size
                 condenser.RESULT_NUMB_WORDS,               // word count
-                plasmaHTCache.docType(document.dc_format()), // doctype
+                httpdProxyCacheEntry.docType(document.dc_format()), // doctype
                 condenser.RESULT_FLAGS,                    // flags
                 yacyURL.language(entry.url()),             // language
                 document.inboundLinks(),                   // inbound links
@@ -842,7 +843,7 @@ public final class plasmaWordIndex implements indexRI {
                 document,                                     // document content
                 condenser,                                    // document condenser
                 yacyURL.language(entry.url()),                // document language
-                plasmaHTCache.docType(document.dc_format()),  // document type
+                httpdProxyCacheEntry.docType(document.dc_format()),  // document type
                 document.inboundLinks(),                      // inbound links
                 document.outboundLinks()                      // outbound links
         );
diff --git a/source/de/anomic/ymage/ymageOSM.java b/source/de/anomic/ymage/ymageOSM.java
index e05375ceb..d5195a3d9 100644
--- a/source/de/anomic/ymage/ymageOSM.java
+++ b/source/de/anomic/ymage/ymageOSM.java
@@ -35,6 +35,7 @@ import java.net.MalformedURLException;
 
 import javax.imageio.ImageIO;
 
+import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.yacy.yacyURL;
@@ -78,7 +79,7 @@ public class ymageOSM {
         InputStream tileStream = plasmaHTCache.getResourceContentStream(tileURL);
         if (tileStream == null) {
             // download resource using the crawler and keep resource in memory if possible
-            final plasmaHTCache.Entry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(tileURL, 20000, true, false, false);
+            final httpdProxyCacheEntry entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(tileURL, 20000, true, false, false);
             if ((entry == null) || (entry.cacheArray() == null)) return null;
             tileStream = new ByteArrayInputStream(entry.cacheArray());
         }