From dae763d8e3df0e5281bbc7018577db8da06f6ea1 Mon Sep 17 00:00:00 2001
From: theli <theli@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 6 Sep 2006 14:31:17 +0000
Subject: [PATCH] git-svn-id:
 https://svn.berlios.de/svnroot/repos/yacy/trunk@2495
 6c8d7289-2bf4-0310-a012-ef5d649a1542

---
 htroot/CacheAdmin_p.java                      |   7 +-
 htroot/ViewFile.html                          |   2 +
 htroot/ViewFile.java                          | 309 ++++++------
 source/de/anomic/http/httpdProxyHandler.java  |  19 +-
 source/de/anomic/icap/icapd.java              |   8 +-
 .../de/anomic/plasma/cache/IResourceInfo.java | 136 +++++
 .../plasma/cache/ResourceInfoFactory.java     |  86 ++++
 .../plasma/cache/http/ResourceInfo.java       | 467 ++++++++++++++++++
 .../plasma/crawler/http/CrawlWorker.java      |  14 +-
 .../de/anomic/plasma/plasmaCrawlStacker.java  |   2 +-
 source/de/anomic/plasma/plasmaHTCache.java    | 380 +++++---------
 .../de/anomic/plasma/plasmaSnippetCache.java  |  29 +-
 .../de/anomic/plasma/plasmaSwitchboard.java   |   9 +-
 .../anomic/plasma/plasmaSwitchboardQueue.java | 152 ++----
 14 files changed, 1057 insertions(+), 563 deletions(-)
 create mode 100644 source/de/anomic/plasma/cache/IResourceInfo.java
 create mode 100644 source/de/anomic/plasma/cache/ResourceInfoFactory.java
 create mode 100644 source/de/anomic/plasma/cache/http/ResourceInfo.java
diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java
index 77bed6ae1..84d7f69d7 100644
--- a/htroot/CacheAdmin_p.java
+++ b/htroot/CacheAdmin_p.java
@@ -59,6 +59,7 @@ import de.anomic.index.indexURL;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.cache.IResourceInfo;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverObjects;
@@ -102,8 +103,8 @@ public class CacheAdmin_p {
 
             info.ensureCapacity(40000);
             try {
-                final httpHeader fileheader = switchboard.cacheManager.getCachedResponse(indexURL.urlHash(url));
-                info.append("<b>HTTP Header:</b><br>").append(formatHeader(fileheader)).append("<br>");
+                final IResourceInfo resInfo = switchboard.cacheManager.loadResourceInfo(url);
+                info.append("<b>HTTP Header:</b><br>").append(formatHeader(resInfo.getMap())).append("<br>");
                 final String ff = file.toString();
                 final int dotpos = ff.lastIndexOf('.');
                 final String ext = (dotpos >= 0) ? ff.substring(dotpos + 1).toLowerCase() : "";
@@ -198,7 +199,7 @@ public class CacheAdmin_p {
         return new String(s);
     }
     
-    private static String formatHeader(httpHeader header) {
+    private static String formatHeader(Map header) {
         final StringBuffer result = new StringBuffer(2048);
         if (header == null) {
             result.append("- no header in header cache -<br>");
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html
index 648192f1b..87830d891 100644
--- a/htroot/ViewFile.html
+++ b/htroot/ViewFile.html
@@ -56,6 +56,8 @@ Invalid URL
 Unable to download resource content.
 :: <!-- 5 -->
 Unable to parse resource content.
+:: <!-- 6 -->
+Unsupported protocol.
 #(/error)#
 </font>
 </p>
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index 3ae7bff55..f1b7b06a5 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -4,23 +4,23 @@
 //(C) by Michael Peter Christen; mc@anomic.de
 //first published on http://www.anomic.de
 //Frankfurt, Germany, 2004
-//
+
 //last major change: 12.07.2004
-//
+
 //This program is free software; you can redistribute it and/or modify
 //it under the terms of the GNU General Public License as published by
 //the Free Software Foundation; either version 2 of the License, or
 //(at your option) any later version.
-//
+
 //This program is distributed in the hope that it will be useful,
 //but WITHOUT ANY WARRANTY; without even the implied warranty of
 //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 //GNU General Public License for more details.
-//
+
 //You should have received a copy of the GNU General Public License
 //along with this program; if not, write to the Free Software
 //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//
+
 //Using this software in any meaning (reading, learning, copying, compiling,
 //running) means that you agree that the Author(s) is (are) not responsible
 //for cost, loss of data or any harm that may be caused directly or indirectly
@@ -32,7 +32,7 @@
 //(are) also not responsible for proper configuration and usage of the
 //software, even if provoked by documentation provided together with
 //the software.
-//
+
 //Any changes to this file according to the GPL as documented in the file
 //gpl.txt aside this file in the shipment you received can be done to the
 //lines that follows this copyright notice here, but changes must not be
@@ -56,18 +56,19 @@ import de.anomic.http.httpc;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.cache.IResourceInfo;
 import de.anomic.plasma.plasmaCrawlLURL.Entry;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
 
 public class ViewFile {
- 
+
     public static final int VIEW_MODE_NO_TEXT = 0;
     public static final int VIEW_MODE_AS_PLAIN_TEXT = 1;
     public static final int VIEW_MODE_AS_PARSED_TEXT = 2;
     public static final int VIEW_MODE_AS_PARSED_SENTENCES = 3;
     public static final int VIEW_MODE_AS_IFRAME = 4;
-    
+
     public static final String[] highlightingColors = new String[] {
         "255,255,100",
         "255,155,155",
@@ -78,12 +79,12 @@ public class ViewFile {
     };
 
     public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
-        
+
         serverObjects prop = new serverObjects();
         plasmaSwitchboard sb = (plasmaSwitchboard)env;     
-        
 
-        
+
+
         if (post.containsKey("words"))
             try {
                 prop.put("error_words",URLEncoder.encode((String) post.get("words"), "UTF-8"));
@@ -91,148 +92,168 @@ public class ViewFile {
                 // TODO Auto-generated catch block
                 e1.printStackTrace();
             }
-        
-        if (post != null) {
-            // getting the url hash from which the content should be loaded
-            String urlHash = post.get("urlHash","");       
-            if (urlHash.equals("")) {
-                prop.put("error",1);
-                prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                return prop;
-            }
-            
-            String viewMode = post.get("viewMode","sentences");
-            
-            // getting the urlEntry that belongs to the url hash
-            Entry urlEntry = null;
-            try {
-                urlEntry = sb.urlPool.loadedURL.getEntry(urlHash, null);
-            } catch (IOException e) {
-                prop.put("error",2);
-                prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                return prop;
-            }            
-            
-            // gettin the url that belongs to the entry
-            URL url = urlEntry.url();
-            if (url == null) {
-                prop.put("error",3);
-                prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                return prop;
-            }    
-            
-            // loading the resource content as byte array
-            byte[] resource = null;
-            httpHeader resHeader = null;
-            String resMime = null;
-            try {
-                resource = sb.cacheManager.loadResource(url);
-                if (resource == null) {
-                    plasmaHTCache.Entry entry = sb.snippetCache.loadResourceFromWeb(url, 5000);                 
-                    
-                    if (entry != null) {
-                        resHeader = entry.responseHeader();
-                    }                    
-                    
-                    resource = sb.cacheManager.loadResource(url);
-                    if (resource == null) {
-                        prop.put("error",4);
-                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                        return prop;
-                    } 
+
+            if (post != null) {
+                // getting the url hash from which the content should be loaded
+                String urlHash = post.get("urlHash","");       
+                if (urlHash.equals("")) {
+                    prop.put("error",1);
+                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                    return prop;
                 }
-                if (resHeader == null) {
-                    resHeader = sb.cacheManager.getCachedResponse(urlEntry.hash());
-                    if (resHeader == null) {
-                        resHeader = httpc.whead(url,url.getHost(),5000,null,null,sb.remoteProxyConfig);
+
+                String viewMode = post.get("viewMode","sentences");
+
+                // getting the urlEntry that belongs to the url hash
+                Entry urlEntry = null;
+                try {
+                    urlEntry = sb.urlPool.loadedURL.getEntry(urlHash, null);
+                } catch (IOException e) {
+                    prop.put("error",2);
+                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                    return prop;
+                }            
+
+                // gettin the url that belongs to the entry
+                URL url = urlEntry.url();
+                if (url == null) {
+                    prop.put("error",3);
+                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                    return prop;
+                }    
+
+                // loading the resource content as byte array
+                byte[] resource = null;
+                IResourceInfo resInfo = null;
+                String resMime = null;
+                try {
+                    // trying to load the resource body
+                    resource = sb.cacheManager.loadResourceContent(url);
+
+                    // if the resource body was not cached we try to load it from web
+                    if (resource == null) {
+                        plasmaHTCache.Entry entry = sb.snippetCache.loadResourceFromWeb(url, 5000);                 
+
+                        if (entry != null) {
+                            resInfo = entry.getDocumentInfo();
+                            resource = sb.cacheManager.loadResourceContent(url);
+                        }
+
                         if (resource == null) {
                             prop.put("error",4);
                             prop.put("viewMode",VIEW_MODE_NO_TEXT);
                             return prop;
                         } 
-                        resMime = resHeader.mime();
                     }
-                }
-            } catch (IOException e) {
-                if (url == null) {
-                    prop.put("error",4);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;
-                }   
-            }    
-            if (viewMode.equals("plain")) {                
-                String content = new String(resource);
-                content = content.replaceAll("<","&lt;")
-                                 .replaceAll(">","&gt;")
-                                 .replaceAll("\"","&quot;")
-                                 .replaceAll("\n","<br>")
-                                 .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
-                
-                prop.put("error",0);
-                prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
-                prop.put("viewMode_plainText",content);                     
-            } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
-                // parsing the resource content
-                plasmaParserDocument document = sb.snippetCache.parseDocument(url, resource,resHeader);
-                if (document == null) {
-                    prop.put("error",5);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;                
-                }
-                resMime = document.getMimeType();
-                
-                if (viewMode.equals("parsed")) {
-                    String content = new String(document.getText());
-		    content = wikiCode.replaceHTML(content); //added by Marc Nause
-                    content = content.replaceAll("\n","<br>")
-                                     .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
-                    
-                    prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
-                    prop.put("viewMode_parsedText",content);
-                } else if (viewMode.equals("iframe")) {
-                    prop.put("viewMode",VIEW_MODE_AS_IFRAME);
-                    prop.put("viewMode_url",url.toString());
-                } else {
-                    prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
-                    String[] sentences = document.getSentences();
-                    
-                    boolean dark = true;
-                    for (int i=0; i < sentences.length; i++) {
-                        String currentSentence = wikiCode.replaceHTML(sentences[i]);
-                        
-                        // Search word highlighting
-                        String words = post.get("words",null);
-                        if (words != null) {
-                            try {
-                                words = URLDecoder.decode(words,"UTF-8");
-                            } catch (UnsupportedEncodingException e) {}
-                            
-                            String[] wordArray = words.substring(1,words.length()-1).split(",");
-                            for (int j=0; j < wordArray.length; j++) {
-                                String currentWord = wordArray[j].trim(); 
-                                currentSentence = currentSentence.replaceAll(currentWord,
-                                        "<b style=\"color: black; background-color: rgb(" + highlightingColors[j%6] + ");\">" + currentWord + "</b>");
+
+                    // try to load resource metadata
+                    if (resInfo == null) {
+
+                        // try to load the metadata from cache
+                        try {
+                            resInfo = sb.cacheManager.loadResourceInfo(urlEntry.url());
+                        } catch (Exception e) { /* ignore this */}
+
+                        // if the metadata where not cached try to load it from web
+                        if (resInfo == null) {
+                            String protocol = url.getProtocol();
+                            if (!((protocol.equals("http") || protocol.equals("https")))) {
+                                prop.put("error",6);
+                                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                                return prop;                                
                             }
+
+                            httpHeader responseHeader = httpc.whead(url,url.getHost(),5000,null,null,sb.remoteProxyConfig);
+                            if (responseHeader == null) {
+                                prop.put("error",4);
+                                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                                return prop;
+                            } 
+                            resMime = responseHeader.mime();
                         }
-                        
-                        prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1)); 
-                        prop.put("viewMode_sentences_" + i + "_text",currentSentence);   
-                        prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
+                    } else {
+                        resMime = resInfo.getMimeType();
                     }
-                    prop.put("viewMode_sentences",sentences.length);
-                                     
-                } 
-            }
-            prop.put("error",0);
-            prop.put("error_url",url.toString());                
-            prop.put("error_hash",urlHash);
-            prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
-            prop.put("error_desc",urlEntry.descr());
-            prop.put("error_size",urlEntry.size());
-            prop.put("error_mimeType",resMime);
-        }        
-        
-        return prop;
+                } catch (IOException e) {
+                    if (url == null) {
+                        prop.put("error",4);
+                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                        return prop;
+                    }   
+                }    
+                if (viewMode.equals("plain")) {                
+                    String content = new String(resource);
+                    content = content.replaceAll("<","&lt;")
+                    .replaceAll(">","&gt;")
+                    .replaceAll("\"","&quot;")
+                    .replaceAll("\n","<br>")
+                    .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
+
+                    prop.put("error",0);
+                    prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
+                    prop.put("viewMode_plainText",content);                     
+                } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
+                    // parsing the resource content
+                    plasmaParserDocument document = sb.snippetCache.parseDocument(url, resource,resInfo);
+                    if (document == null) {
+                        prop.put("error",5);
+                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                        return prop;                
+                    }
+                    resMime = document.getMimeType();
+
+                    if (viewMode.equals("parsed")) {
+                        String content = new String(document.getText());
+                        content = wikiCode.replaceHTML(content); //added by Marc Nause
+                        content = content.replaceAll("\n","<br>")
+                        .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
+
+                        prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
+                        prop.put("viewMode_parsedText",content);
+                    } else if (viewMode.equals("iframe")) {
+                        prop.put("viewMode",VIEW_MODE_AS_IFRAME);
+                        prop.put("viewMode_url",url.toString());
+                    } else {
+                        prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
+                        String[] sentences = document.getSentences();
+
+                        boolean dark = true;
+                        for (int i=0; i < sentences.length; i++) {
+                            String currentSentence = wikiCode.replaceHTML(sentences[i]);
+
+                            // Search word highlighting
+                            String words = post.get("words",null);
+                            if (words != null) {
+                                try {
+                                    words = URLDecoder.decode(words,"UTF-8");
+                                } catch (UnsupportedEncodingException e) {}
+
+                                String[] wordArray = words.substring(1,words.length()-1).split(",");
+                                for (int j=0; j < wordArray.length; j++) {
+                                    String currentWord = wordArray[j].trim(); 
+                                    currentSentence = currentSentence.replaceAll(currentWord,
+                                            "<b style=\"color: black; background-color: rgb(" + highlightingColors[j%6] + ");\">" + currentWord + "</b>");
+                                }
+                            }
+
+                            prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1)); 
+                            prop.put("viewMode_sentences_" + i + "_text",currentSentence);   
+                            prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
+                        }
+                        prop.put("viewMode_sentences",sentences.length);
+
+                    } 
+                }
+                prop.put("error",0);
+                prop.put("error_url",url.toString());                
+                prop.put("error_hash",urlHash);
+                prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
+                prop.put("error_desc",urlEntry.descr());
+                prop.put("error_size",urlEntry.size());
+                prop.put("error_mimeType",resMime);
+            }        
+
+            return prop;
     }
-    
+
 }
diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index d6315ed1a..bf703b1f6 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -96,6 +96,8 @@ import de.anomic.index.indexURL;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.cache.http.ResourceInfo;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverFileUtils;
@@ -413,8 +415,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
             
             // decide wether to use a cache entry or connect to the network
             File cacheFile = cacheManager.getCachePath(url);
-            String urlHash = indexURL.urlHash(url);
-            httpHeader cachedResponseHeader = cacheManager.getCachedResponse(urlHash);
+            ResourceInfo cachedResInfo = (ResourceInfo) cacheManager.loadResourceInfo(url);
+            httpHeader cachedResponseHeader = (cachedResInfo == null)?null:cachedResInfo.getResponseHeader();
             boolean cacheExists = ((cacheFile.isFile()) && (cachedResponseHeader != null));
             
             // why are files unzipped upon arrival? why not zip all files in cache?
@@ -445,9 +447,10 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
                     0,                               // crawling depth
                     url,                             // url
                     "",                        // name of the url is unknown
-                    requestHeader,                   // request headers
+                    //requestHeader,                   // request headers
                     "200 OK",                        // request status
-                    cachedResponseHeader,            // response headers
+                    //cachedResponseHeader,            // response headers
+                    cachedResInfo,
                     null,                            // initiator
                     switchboard.defaultProxyProfile  // profile
             );
@@ -579,15 +582,17 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
             }            
 
             // reserver cache entry
-            Date requestDate = new Date(((Long)conProp.get(httpHeader.CONNECTION_PROP_REQUEST_START)).longValue()); 
+            Date requestDate = new Date(((Long)conProp.get(httpHeader.CONNECTION_PROP_REQUEST_START)).longValue());
+            IResourceInfo resInfo = new ResourceInfo(url,requestHeader,res.responseHeader);
             plasmaHTCache.Entry cacheEntry = cacheManager.newEntry(
                     requestDate, 
                     0, 
                     url,
                     "",
-                    requestHeader, 
+                    //requestHeader, 
                     res.status, 
-                    res.responseHeader, 
+                    //res.responseHeader,
+                    resInfo,
                     null, 
                     switchboard.defaultProxyProfile
             );
diff --git a/source/de/anomic/icap/icapd.java b/source/de/anomic/icap/icapd.java
index de4ea546f..648d0fd2b 100644
--- a/source/de/anomic/icap/icapd.java
+++ b/source/de/anomic/icap/icapd.java
@@ -64,6 +64,8 @@ import de.anomic.http.httpc;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.cache.http.ResourceInfo;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverHandler;
@@ -385,14 +387,14 @@ public class icapd implements serverHandler {
              * ========================================================================= */
             
             // generating a htcache entry object
+            IResourceInfo resInfo = new ResourceInfo(httpRequestURL,httpReqHeader,httpResHeader);
             plasmaHTCache.Entry cacheEntry = cacheManager.newEntry(
                     new Date(),  
                     0, 
                     httpRequestURL,
                     "",
-                    httpReqHeader, 
-                    httpRespStatusLine, 
-                    httpResHeader, 
+                    httpRespStatusLine,
+                    resInfo,
                     null, 
                     switchboard.defaultProxyProfile
             );
diff --git a/source/de/anomic/plasma/cache/IResourceInfo.java b/source/de/anomic/plasma/cache/IResourceInfo.java
new file mode 100644
index 000000000..72c344933
--- /dev/null
+++ b/source/de/anomic/plasma/cache/IResourceInfo.java
@@ -0,0 +1,136 @@
+// IResourceInfo.java 
+// -------------------------------------
+// part of YACY
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2006
+//
+// This file ist contributed by Martin Thelian
+//
+// $LastChangedDate: 2006-02-20 23:57:42 +0100 (Mo, 20 Feb 2006) $
+// $LastChangedRevision: 1715 $
+// $LastChangedBy: theli $
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+
+
+package de.anomic.plasma.cache;
+
+import java.util.Date;
+import java.util.Map;
+
+import de.anomic.net.URL;
+
+public interface IResourceInfo {
+    
+    /**
+     * Return the resource information as map
+     * @return
+     */
+    public Map getMap();
+    
+    /**
+     * Returns the URL of this content
+     * @return
+     */
+    public URL getUrl();
+    
+    /**
+     * Returns the referer URL of this URL
+     * @return referer URL
+     */
+    public URL getRefererUrl();
+    
+    /**
+     * Returns the mimetype of the cached object
+     * @return mimetype
+     */
+    public String getMimeType();
+    
+    /**
+     * Returns the modification date of the cached object
+     * @return the modifiaction date
+     */
+    public Date getModificationDate();
+    
+    /**
+     * Returns the url hash of the content URL
+     * @return
+     */
+    public String getUrlHash();
+    
+    /**
+     * Specifies if the resource was requested with a
+     * if modified since date
+     * @return
+     */
+    public Date ifModifiedSince();
+    
+    /**
+     * Specifies if the resource was requested with 
+     * client specific information (e.g. cookies for http)
+     * @return
+     */
+    public boolean requestWithCookie();
+    
+    /**
+     * Specifies if the request prohibits indexing
+     * @return
+     */
+    public boolean requestProhibitsIndexing();
+    
+    /**
+     * Determines if a resource that was downloaded by the crawler
+     * is allowed to be indexed.
+     *  
+     * @return an error string describing the reason why the
+     * resourse should not be indexed or null if indexing is allowed
+     */
+    public String shallIndexCacheForCrawler();
+    
+    /**
+     * Determines if a resource that was downloaded by the proxy
+     * is allowed to be indexed.
+     *  
+     * @return an error string describing the reason why the
+     * resourse should not be indexed or null if indexing is allowed
+     */    
+    public String shallIndexCacheForProxy();
+    
+    public String shallStoreCacheForProxy();
+    public boolean shallUseCacheForProxy();
+    
+    public boolean validResponseStatus(String responseStatus);
+}
diff --git a/source/de/anomic/plasma/cache/ResourceInfoFactory.java b/source/de/anomic/plasma/cache/ResourceInfoFactory.java
new file mode 100644
index 000000000..75c0a2d07
--- /dev/null
+++ b/source/de/anomic/plasma/cache/ResourceInfoFactory.java
@@ -0,0 +1,86 @@
+// RespourceInfoFactory.java 
+// -------------------------------------
+// part of YACY
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2006
+//
+// This file ist contributed by Martin Thelian
+//
+// $LastChangedDate: 2006-02-20 23:57:42 +0100 (Mo, 20 Feb 2006) $
+// $LastChangedRevision: 1715 $
+// $LastChangedBy: theli $
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+
+
+package de.anomic.plasma.cache;
+
+import java.lang.reflect.Constructor;
+import java.util.Map;
+
+import de.anomic.net.URL;
+
+public class ResourceInfoFactory {
+    public IResourceInfo buildResourceInfoObj(
+            URL resourceURL,
+            Map resourceMetadata
+    ) throws Exception {
+        
+        String protocString = resourceURL.getProtocol();
+        
+        // the full qualified class name
+        String className = this.getClass().getPackage().getName() + "." + protocString + ".ResourceInfo";
+        
+        // loading class by name
+        Class moduleClass = Class.forName(className);
+
+        // getting the constructor
+        Constructor classConstructor = moduleClass.getConstructor( new Class[] { 
+                URL.class,
+                Map.class
+        } );
+
+        // instantiating class
+        IResourceInfo infoObject = (IResourceInfo) classConstructor.newInstance(new Object[] {
+              resourceURL,
+              resourceMetadata
+        });        
+        
+        // return the newly created object
+        return infoObject; 
+        
+    }
+}
diff --git a/source/de/anomic/plasma/cache/http/ResourceInfo.java b/source/de/anomic/plasma/cache/http/ResourceInfo.java
new file mode 100644
index 000000000..3b1c2d4b3
--- /dev/null
+++ b/source/de/anomic/plasma/cache/http/ResourceInfo.java
@@ -0,0 +1,467 @@
+// ResourceInfo.java 
+// -------------------------------------
+// part of YACY
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2006
+//
+// This file ist contributed by Martin Thelian
+//
+// $LastChangedDate: 2006-02-20 23:57:42 +0100 (Mo, 20 Feb 2006) $
+// $LastChangedRevision: 1715 $
+// $LastChangedBy: theli $
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+
+
+package de.anomic.plasma.cache.http;
+
+import java.util.Date;
+import java.util.Map;
+
+import de.anomic.http.httpHeader;
+import de.anomic.index.indexURL;
+import de.anomic.net.URL;
+import de.anomic.plasma.plasmaHTCache;
+import de.anomic.plasma.cache.ResourceInfoFactory;
+import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.server.serverDate;
+
+public class ResourceInfo implements IResourceInfo {
+    private URL url;
+    private String urlHash;
+    private httpHeader responseHeader;
+    private httpHeader requestHeader;
+    
+    /**
+     * Constructor used by the {@link ResourceInfoFactory}
+     * @param objectURL
+     * @param objectInfo
+     */
+    public ResourceInfo(URL objectURL, Map objectInfo) {
+        if (objectURL == null) throw new NullPointerException();
+        if (objectInfo == null) throw new NullPointerException();
+        
+        // generating the url hash
+        this.url = objectURL;
+        this.urlHash = indexURL.urlHash(this.url.toNormalform());
+        
+        // create the http header object
+        this.responseHeader =  new httpHeader(null, objectInfo);
+    }
+
+    public ResourceInfo(URL objectURL, httpHeader requestHeaders, httpHeader responseHeaders) {
+        if (objectURL == null) throw new NullPointerException();
+        if (responseHeaders == null) throw new NullPointerException();  
+        
+        // generating the url hash
+        this.url = objectURL;
+        this.urlHash = indexURL.urlHash(this.url.toNormalform());
+        
+        this.requestHeader = requestHeaders;
+        this.responseHeader = responseHeaders;
+    }
+    
+    public Map getMap() {
+        return this.responseHeader;
+    }
+    
+    /**
+     * @see de.anomic.plasma.cache.IResourceInfo#getMimeType()
+     */
+    public String getMimeType() {
+        if (this.responseHeader == null) return null;
+        
+        String mimeType = this.responseHeader.mime();
+        mimeType = mimeType.trim().toLowerCase();
+        
+        int pos = mimeType.indexOf(';');
+        return ((pos < 0) ? mimeType : mimeType.substring(0, pos));          
+    }
+
+    /**
+     * @see de.anomic.plasma.cache.IResourceInfo#getModificationDate()
+     */
+    public Date getModificationDate() {
+        Date docDate = null;
+        
+        if (this.responseHeader != null) {
+            docDate = this.responseHeader.lastModified();
+            if (docDate == null) docDate = this.responseHeader.date();
+        }
+        if (docDate == null) docDate = new Date(serverDate.correctedUTCTime());   
+        
+        return docDate;
+    }
+    
+    public URL getRefererUrl() {
+        if (this.requestHeader == null) return null;
+        try {
+            return new URL((String) this.requestHeader.get(httpHeader.REFERER, ""));
+        } catch (Exception e) {
+            return null;
+        }        
+    }
+    
+    /**
+     * @see de.anomic.plasma.cache.IResourceInfo#getUrl()
+     */
+    public URL getUrl() {
+        return this.url;
+    }
+    
+    /**
+     * @see de.anomic.plasma.cache.IResourceInfo#getUrlHash()
+     */    
+    public String getUrlHash() {
+        return this.urlHash;
+    }
+
+    /**
+     * @see de.anomic.plasma.cache.IResourceInfo#shallIndexCacheForCrawler()
+     */
+    public String shallIndexCacheForCrawler() {
+        String mimeType = this.getMimeType();
+        if (plasmaHTCache.isPicture(mimeType)) { return "Media_Content_(Picture)"; }
+        if (!plasmaHTCache.isText(mimeType)) { return "Media_Content_(not_text)"; }
+        return null;
+    }
+
+    /**
+     * @see de.anomic.plasma.cache.IResourceInfo#shallIndexCacheForProxy()
+     */
+    public String shallIndexCacheForProxy() {
+        // -set-cookie in response
+        // the set-cookie from the server does not indicate that the content is special
+        // thus we do not care about it here for indexing                
+        
+        // a picture cannot be indexed
+        String mimeType = this.getMimeType();
+        if (plasmaHTCache.isPicture(mimeType)) {
+            return "Media_Content_(Picture)";
+        }
+        if (!plasmaHTCache.isText(mimeType)) {
+            return "Media_Content_(not_text)";
+        }
+
+        // -if-modified-since in request
+        // if the page is fresh at the very moment we can index it
+        Date ifModifiedSince = getModificationDate();
+        if ((ifModifiedSince != null) && (this.responseHeader.containsKey(httpHeader.LAST_MODIFIED))) {
+            // parse date
+            Date d = this.responseHeader.lastModified();
+            if (d == null) {
+                d = new Date(serverDate.correctedUTCTime());
+            }
+            // finally, we shall treat the cache as stale if the modification time is after the if-.. time
+            if (d.after(ifModifiedSince)) {
+                //System.out.println("***not indexed because if-modified-since");
+                return "Stale_(Last-Modified>Modified-Since)";
+            }
+        }
+
+        // -pragma in cached response
+        if (this.responseHeader.containsKey(httpHeader.PRAGMA) &&
+            ((String) this.responseHeader.get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE")) {
+            return "Denied_(pragma_no_cache)";
+        }
+
+        // see for documentation also:
+        // http://www.web-caching.com/cacheability.html
+
+        // look for freshnes information
+
+        // -expires in cached response
+        // the expires value gives us a very easy hint when the cache is stale
+        // sometimes, the expires date is set to the past to prevent that a page is cached
+        // we use that information to see if we should index it
+        final Date expires = this.responseHeader.expires();
+        if (expires != null && expires.before(new Date(serverDate.correctedUTCTime()))) {
+            return "Stale_(Expired)";
+        }
+
+        // -lastModified in cached response
+        // this information is too weak to use it to prevent indexing
+        // even if we can apply a TTL heuristic for cache usage
+
+        // -cache-control in cached response
+        // the cache-control has many value options.
+        String cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
+        if (cacheControl != null) {
+            cacheControl = cacheControl.trim().toUpperCase();
+            /* we have the following cases for cache-control:
+               "public" -- can be indexed
+               "private", "no-cache", "no-store" -- cannot be indexed
+               "max-age=<delta-seconds>" -- stale/fresh dependent on date
+             */
+            if (cacheControl.startsWith("PRIVATE") ||
+                cacheControl.startsWith("NO-CACHE") ||
+                cacheControl.startsWith("NO-STORE")) {
+                // easy case
+                return "Stale_(denied_by_cache-control=" + cacheControl + ")";
+//          } else if (cacheControl.startsWith("PUBLIC")) {
+//              // ok, do nothing
+            } else if (cacheControl.startsWith("MAX-AGE=")) {
+                // we need also the load date
+                final Date date = this.responseHeader.date();
+                if (date == null) {
+                    return "Stale_(no_date_given_in_response)";
+                }
+                try {
+                    final long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
+                    if (serverDate.correctedUTCTime() - date.getTime() > ttl) {
+                        //System.out.println("***not indexed because cache-control");
+                        return "Stale_(expired_by_cache-control)";
+                    }
+                } catch (Exception e) {
+                    return "Error_(" + e.getMessage() + ")";
+                }
+            }
+        }
+        return null;
+    }
+
+    public String shallStoreCacheForProxy() {
+        if (this.requestHeader != null) {
+            // -authorization cases in request
+            // authorization makes pages very individual, and therefore we cannot use the
+            // content in the cache
+            if (this.requestHeader.containsKey(httpHeader.AUTHORIZATION)) { return "personalized"; }
+            // -ranges in request and response
+            // we do not cache partial content
+            if (this.requestHeader.containsKey(httpHeader.RANGE)) { return "partial"; }
+        }
+        
+        if (this.responseHeader != null) {
+            // -ranges in request and response
+            // we do not cache partial content            
+            if (this.responseHeader.containsKey(httpHeader.CONTENT_RANGE)) { return "partial"; }
+
+            // -if-modified-since in request
+            // we do not care about if-modified-since, because this case only occurres if the
+            // cache file does not exist, and we need as much info as possible for the indexing
+
+            // -cookies in request
+            // we do not care about cookies, because that would prevent loading more pages
+            // from one domain once a request resulted in a client-side stored cookie
+
+            // -set-cookie in response
+            // we do not care about cookies in responses, because that info comes along
+            // any/many pages from a server and does not express the validity of the page
+            // in modes of life-time/expiration or individuality
+
+            // -pragma in response
+            // if we have a pragma non-cache, we don't cache. usually if this is wanted from
+            // the server, it makes sense
+            String cacheControl = (String) this.responseHeader.get(httpHeader.PRAGMA);
+            if (cacheControl != null && cacheControl.trim().toUpperCase().equals("NO-CACHE")) { return "controlled_no_cache"; }
+
+            // -expires in response
+            // we do not care about expires, because at the time this is called the data is
+            // obvious valid and that header info is used in the indexing later on
+
+            // -cache-control in response
+            // the cache-control has many value options.
+            cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
+            if (cacheControl != null) {
+                cacheControl = cacheControl.trim().toUpperCase();
+                if (cacheControl.startsWith("MAX-AGE=")) {
+                    // we need also the load date
+                    Date date = this.responseHeader.date();
+                    if (date == null) return "stale_no_date_given_in_response";
+                    try {
+                        long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
+                        if (serverDate.correctedUTCTime() - date.getTime() > ttl) {
+                            //System.out.println("***not indexed because cache-control");
+                            return "stale_expired";
+                        }
+                    } catch (Exception e) {
+                        return "stale_error_" + e.getMessage() + ")";
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+    public boolean shallUseCacheForProxy() {
+        
+        String cacheControl;
+        if (this.requestHeader != null) {
+            // -authorization cases in request
+            if (this.requestHeader.containsKey(httpHeader.AUTHORIZATION)) { return false; }
+
+            // -ranges in request
+            // we do not cache partial content
+            if (this.requestHeader.containsKey(httpHeader.RANGE)) { return false; }
+
+            // if the client requests a un-cached copy of the resource ...
+            cacheControl = (String) this.requestHeader.get(httpHeader.PRAGMA);
+            if (cacheControl != null && cacheControl.trim().toUpperCase().equals("NO-CACHE")) { return false; }
+
+            cacheControl = (String) this.requestHeader.get(httpHeader.CACHE_CONTROL);
+            if (cacheControl != null) {
+                cacheControl = cacheControl.trim().toUpperCase();
+                if (cacheControl.startsWith("NO-CACHE") || cacheControl.startsWith("MAX-AGE=0")) { return false; }
+            }
+        }
+
+        // -if-modified-since in request
+        // The entity has to be transferred only if it has
+        // been modified since the date given by the If-Modified-Since header.
+        if (this.requestHeader.containsKey(httpHeader.IF_MODIFIED_SINCE)) {
+            // checking this makes only sense if the cached response contains
+            // a Last-Modified field. If the field does not exist, we go the safe way
+            if (!this.responseHeader.containsKey(httpHeader.LAST_MODIFIED)) { return false; }
+            // parse date
+            Date d1, d2;
+            d2 = this.responseHeader.lastModified(); if (d2 == null) { d2 = new Date(serverDate.correctedUTCTime()); }
+            d1 = this.requestHeader.ifModifiedSince(); if (d1 == null) { d1 = new Date(serverDate.correctedUTCTime()); }
+            // finally, we shall treat the cache as stale if the modification time is after the if-.. time
+            if (d2.after(d1)) { return false; }
+        }
+
+        String mimeType = this.getMimeType();
+        if (!plasmaHTCache.isPicture(mimeType)) {
+            // -cookies in request
+            // unfortunately, we should reload in case of a cookie
+            // but we think that pictures can still be considered as fresh
+            // -set-cookie in cached response
+            // this is a similar case as for COOKIE.
+            if (this.requestHeader.containsKey(httpHeader.COOKIE) ||
+                this.responseHeader.containsKey(httpHeader.SET_COOKIE) ||
+                this.responseHeader.containsKey(httpHeader.SET_COOKIE2)) {
+                return false; // too strong
+            }
+        }
+
+        // -pragma in cached response
+        // logically, we would not need to care about no-cache pragmas in cached response headers,
+        // because they cannot exist since they are not written to the cache.
+        // So this IF should always fail..
+        cacheControl = (String) this.responseHeader.get(httpHeader.PRAGMA); 
+        if (cacheControl != null && cacheControl.trim().toUpperCase().equals("NO-CACHE")) { return false; }
+
+        // see for documentation also:
+        // http://www.web-caching.com/cacheability.html
+        // http://vancouver-webpages.com/CacheNow/
+
+        // look for freshnes information
+        // if we don't have any freshnes indication, we treat the file as stale.
+        // no handle for freshness control:
+
+        // -expires in cached response
+        // the expires value gives us a very easy hint when the cache is stale
+        Date expires = this.responseHeader.expires();
+        if (expires != null) {
+//          System.out.println("EXPIRES-TEST: expires=" + expires + ", NOW=" + serverDate.correctedGMTDate() + ", url=" + url);
+            if (expires.before(new Date(serverDate.correctedUTCTime()))) { return false; }
+        }
+        Date lastModified = this.responseHeader.lastModified();
+        cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
+        if (cacheControl == null && lastModified == null && expires == null) { return false; }
+
+        // -lastModified in cached response
+        // we can apply a TTL (Time To Live)  heuristic here. We call the time delta between the last read
+        // of the file and the last modified date as the age of the file. If we consider the file as
+        // middel-aged then, the maximum TTL would be cache-creation plus age.
+        // This would be a TTL factor of 100% we want no more than 10% TTL, so that a 10 month old cache
+        // file may only be treated as fresh for one more month, not more.
+        Date date = this.responseHeader.date();
+        if (lastModified != null) {
+            if (date == null) { date = new Date(serverDate.correctedUTCTime()); }
+            long age = date.getTime() - lastModified.getTime();
+            if (age < 0) { return false; }
+            // TTL (Time-To-Live) is age/10 = (d2.getTime() - d1.getTime()) / 10
+            // the actual living-time is serverDate.correctedGMTDate().getTime() - d2.getTime()
+            // therefore the cache is stale, if serverDate.correctedGMTDate().getTime() - d2.getTime() > age/10
+            if (serverDate.correctedUTCTime() - date.getTime() > age / 10) { return false; }
+        }
+
+        // -cache-control in cached response
+        // the cache-control has many value options.
+        if (cacheControl != null) {
+            cacheControl = cacheControl.trim().toUpperCase();
+            if (cacheControl.startsWith("PRIVATE") ||
+                cacheControl.startsWith("NO-CACHE") ||
+                cacheControl.startsWith("NO-STORE")) {
+                // easy case
+                return false;
+//          } else if (cacheControl.startsWith("PUBLIC")) {
+//              // ok, do nothing
+            } else if (cacheControl.startsWith("MAX-AGE=")) {
+                // we need also the load date
+                if (date == null) { return false; }
+                try {
+                    final long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
+                    if (serverDate.correctedUTCTime() - date.getTime() > ttl) {
+                        return false;
+                    }
+                } catch (Exception e) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    public boolean validResponseStatus(String responseStatus) {
+        return responseStatus.startsWith("200") ||
+               responseStatus.startsWith("203");
+    }
+
+    public Date ifModifiedSince() {
+        return (this.requestHeader == null) ? null : this.requestHeader.ifModifiedSince();
+    }
+
+    public boolean requestWithCookie() {
+        return (this.requestHeader == null) ? false : this.requestHeader.containsKey(httpHeader.COOKIE);
+    }
+
+    public boolean requestProhibitsIndexing() {
+        return (this.requestHeader == null) 
+        ? false 
+        : this.requestHeader.containsKey(httpHeader.X_YACY_INDEX_CONTROL) &&
+          ((String)this.requestHeader.get(httpHeader.X_YACY_INDEX_CONTROL)).toUpperCase().equals("NO-INDEX");
+    }
+    
+    public httpHeader getRequestHeader() {
+        return this.requestHeader;
+    }
+    
+    public httpHeader getResponseHeader() {
+        return this.responseHeader;
+    }
+}
diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java
index 5eaafd77b..dcddcdc1f 100644
--- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java
+++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java
@@ -64,6 +64,8 @@ import de.anomic.plasma.plasmaCrawlLoader;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.cache.http.ResourceInfo;
 import de.anomic.plasma.crawler.AbstractCrawlWorker;
 import de.anomic.plasma.crawler.plasmaCrawlerPool;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
@@ -129,15 +131,15 @@ public final class CrawlWorker extends AbstractCrawlWorker {
         return load(DEFAULT_CRAWLING_RETRY_COUNT);
     }    
 
-    protected plasmaHTCache.Entry createCacheEntry(Date requestDate, httpHeader requestHeader, httpc.response response) {
+    protected plasmaHTCache.Entry createCacheEntry(URL requestUrl, Date requestDate, httpHeader requestHeader, httpc.response response) {
+        IResourceInfo resourceInfo = new ResourceInfo(requestUrl,requestHeader,response.responseHeader);
         return this.cacheManager.newEntry(
                 requestDate, 
                 this.depth, 
                 this.url, 
-                this.name, 
-                requestHeader, 
-                response.status, 
-                response.responseHeader, 
+                this.name,  
+                response.status,
+                resourceInfo, 
                 this.initiator, 
                 this.profile
         );
@@ -197,7 +199,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
                 // the transfer is ok
                 
                 // create a new cache entry
-                htCache = createCacheEntry(requestDate, requestHeader, res); 
+                htCache = createCacheEntry(this.url,requestDate, requestHeader, res); 
                 
                 // aborting download if content is to long ...
                 if (htCache.cacheFile().getAbsolutePath().length() > serverSystem.maxPathLength) {
diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java
index 0b444dc50..103962769 100644
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@@ -295,7 +295,7 @@ public final class plasmaCrawlStacker {
         }
         
         // check if ip is local ip address
-        checkInterruption();        
+        checkInterruption();        // TODO: this is protocol specific
         InetAddress hostAddress = httpc.dnsResolve(nexturl.getHost());
         if (hostAddress == null) {
             // if a http proxy is configured name resolution may not work
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index c0b40bca4..99b3bf474 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -54,14 +54,12 @@
 package de.anomic.plasma;
 
 import de.anomic.http.httpc;
-import de.anomic.http.httpHeader;
 import de.anomic.index.indexEntryAttribute;
 import de.anomic.index.indexURL;
 import de.anomic.kelondro.kelondroDyn;
 import de.anomic.kelondro.kelondroMap;
 import de.anomic.kelondro.kelondroMScoreCluster;
 import de.anomic.server.logging.serverLog;
-import de.anomic.server.serverDate;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverInstantThread;
 import de.anomic.server.serverSystem;
@@ -73,6 +71,9 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.net.MalformedURLException;
 import de.anomic.net.URL;
+import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.cache.ResourceInfoFactory;
+
 import java.util.Date;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -87,7 +88,7 @@ public final class plasmaHTCache {
     private static final int stackLimit = 150; // if we exceed that limit, we do not check idle
     public  static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day
 
-    private kelondroMap responseHeaderDB = null;
+    kelondroMap responseHeaderDB = null;
     private final LinkedList cacheStack;
     private final TreeMap cacheAge; // a <date+hash, cache-path> - relation
     public long curCacheSize;
@@ -96,11 +97,16 @@ public final class plasmaHTCache {
     public final serverLog log;
     public static final HashSet filesInUse = new HashSet(); // can we delete this file
 
+    private ResourceInfoFactory objFactory;
+    
     public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb, long preloadTime) {
         // this.switchboard = switchboard;
 
         this.log = new serverLog("HTCACHE");
         this.cachePath = htCachePath;
+        
+        // create the object factory
+        this.objFactory = new ResourceInfoFactory();
 
         // reset old HTCache ?
         String[] list = this.cachePath.list();
@@ -229,10 +235,6 @@ public final class plasmaHTCache {
         }
     }
 
-    public void storeHeader(String urlHash, httpHeader responseHeader) throws IOException {
-        this.responseHeaderDB.set(urlHash, responseHeader);
-    }
-
     /**
      * This method changes the HTCache size.<br>
      * @param new cache size in bytes
@@ -249,7 +251,7 @@ public final class plasmaHTCache {
         return (this.curCacheSize >= this.maxCacheSize) ? 0 : this.maxCacheSize - this.curCacheSize;
     }
 
-    public boolean writeFile(URL url, byte[] array) {
+    public boolean writeResourceContent(URL url, byte[] array) {
         if (array == null) return false;
         File file = getCachePath(url);
         try {
@@ -445,10 +447,24 @@ public final class plasmaHTCache {
         return prefix + s.substring(0, p);
     }
 
-    public httpHeader getCachedResponse(String urlHash) throws IOException {
+    /**
+     * Returns an object containing metadata about a cached resource
+     * @param url the url of the resource
+     * @return an {@link IResourceInfo info object}  
+     * @throws Exception of the info object could not be created, e.g. if the protocol is not supported
+     */
+    public IResourceInfo loadResourceInfo(URL url) throws Exception {    
+        
+        // getting the URL hash
+        String urlHash = indexURL.urlHash(url.toNormalform());
+        
+        // loading data from database
         Map hdb = this.responseHeaderDB.get(urlHash);
         if (hdb == null) return null;
-        return new httpHeader(null, hdb);
+        
+        // generate the cached object
+        IResourceInfo cachedObj = this.objFactory.buildResourceInfoObj(url, hdb);
+        return cachedObj;
     }
 
     public boolean full() {
@@ -459,18 +475,17 @@ public final class plasmaHTCache {
         return (this.cacheStack.size() == 0);
     }
 
-    public static boolean isPicture(httpHeader response) {
-        Object ct = response.get(httpHeader.CONTENT_TYPE);
-        if (ct == null) return false;
-        return ((String)ct).toUpperCase().startsWith("IMAGE");
+    public static boolean isPicture(String mimeType) {
+        if (mimeType == null) return false;
+        return mimeType.toUpperCase().startsWith("IMAGE");
     }
 
-    public static boolean isText(httpHeader response) {
+    public static boolean isText(String mimeType) {
 //      Object ct = response.get(httpHeader.CONTENT_TYPE);
 //      if (ct == null) return false;
 //      String t = ((String)ct).toLowerCase();
 //      return ((t.startsWith("text")) || (t.equals("application/xhtml+xml")));
-        return plasmaParser.supportedMimeTypesContains(response.mime());
+        return plasmaParser.supportedMimeTypesContains(mimeType);
     }
 
     public static boolean noIndexingURL(String urlString) {
@@ -568,9 +583,8 @@ public final class plasmaHTCache {
         }
         if (port < 0) {
             return new File(this.cachePath, protocol + "/" + host + path);
-        } else {
-            return new File(this.cachePath, protocol + "/" + host + "!" + port + path);
         }
+        return new File(this.cachePath, protocol + "/" + host + "!" + port + path);
     }
 
     /**
@@ -663,7 +677,7 @@ public final class plasmaHTCache {
         return null;
     }
 
-    public byte[] loadResource(URL url) {
+    public byte[] loadResourceContent(URL url) {
         // load the url as resource from the cache
         File f = getCachePath(url);
         if (f.exists()) try {
@@ -690,12 +704,30 @@ public final class plasmaHTCache {
                 (ls.indexOf("memberlist.php?sid=") >= 0));
     }
 
-    public Entry newEntry(Date initDate, int depth, URL url, String name,
-                          httpHeader requestHeader,
-                          String responseStatus, httpHeader responseHeader,
-                          String initiator,
-                          plasmaCrawlProfile.entry profile) {
-        return new Entry(initDate, depth, url, name, requestHeader, responseStatus, responseHeader, initiator, profile);
+    public Entry newEntry(
+            Date initDate, 
+            int depth, 
+            URL url, 
+            String name,
+            //httpHeader requestHeader,
+            String responseStatus, 
+            //httpHeader responseHeader,
+            IResourceInfo docInfo,            
+            String initiator,
+            plasmaCrawlProfile.entry profile
+    ) {
+        return new Entry(
+                initDate, 
+                depth, 
+                url, 
+                name, 
+                //requestHeader, 
+                responseStatus, 
+                //responseHeader,
+                docInfo,
+                initiator, 
+                profile
+        );
     }
 
     public final class Entry {
@@ -703,9 +735,9 @@ public final class plasmaHTCache {
     // the class objects
     private Date                     initDate;       // the date when the request happened; will be used as a key
     private int                      depth;          // the depth of prefetching
-    private httpHeader               requestHeader;  // we carry also the header to prevent too many file system access
-    private String                   responseStatus;
-    private httpHeader               responseHeader; // we carry also the header to prevent too many file system access
+//    private httpHeader               requestHeader;  // we carry also the header to prevent too many file system access
+//    private httpHeader               responseHeader; // we carry also the header to prevent too many file system access
+    private String                   responseStatus;    
     private File                     cacheFile;      // the cache file
     private byte[]                   cacheArray;     // or the cache as byte-array
     private URL                      url;
@@ -718,6 +750,11 @@ public final class plasmaHTCache {
     private String                   language;
     private plasmaCrawlProfile.entry profile;
     private String                   initiator;
+    
+    /**
+     * protocolspecific information about the resource 
+     */
+    private IResourceInfo              resInfo;
 
     protected Object clone() throws CloneNotSupportedException {
         return new Entry(
@@ -725,9 +762,10 @@ public final class plasmaHTCache {
                 this.depth,
                 this.url,
                 this.name,
-                this.requestHeader,
+                //this.requestHeader,
                 this.responseStatus,
-                this.responseHeader,
+                //this.responseHeader,
+                this.resInfo,
                 this.initiator,
                 this.profile
         );
@@ -737,15 +775,21 @@ public final class plasmaHTCache {
             int depth, 
             URL url, 
             String name,
-            httpHeader requestHeader,
-            String responseStatus, 
-            httpHeader responseHeader,
+            //httpHeader requestHeader,
+            String responseStatus,
+            //httpHeader responseHeader,
+            IResourceInfo resourceInfo,            
             String initiator,
             plasmaCrawlProfile.entry profile
     ) {
-
+        if (resourceInfo == null){
+            System.out.println("Content information object is null. " + url);
+            System.exit(0);            
+        }
+        this.resInfo = resourceInfo;
+        
+        
         // normalize url
-//      serverLog.logFine("PLASMA", "Entry: URL=" + url.toString());
         this.nomalizedURLString = url.toNormalform();
 
         try {
@@ -761,28 +805,17 @@ public final class plasmaHTCache {
        // assigned:
         this.initDate       = initDate;
         this.depth          = depth;
-        this.requestHeader  = requestHeader;
+        //this.requestHeader  = requestHeader;
         this.responseStatus = responseStatus;
-        this.responseHeader = responseHeader;
+        //this.responseHeader = responseHeader;
         this.profile        = profile;
         this.initiator      = (initiator == null) ? null : ((initiator.length() == 0) ? null : initiator);
 
-        // calculated:
-        if (responseHeader == null) {
-           try {
-               throw new RuntimeException("RESPONSE HEADER = NULL");
-           } catch (Exception e) {
-               System.out.println("RESPONSE HEADER = NULL in " + url);
-               e.printStackTrace();
-               System.exit(0);
-           }
-
-            this.lastModified = new Date(serverDate.correctedUTCTime());
-        } else {
-            this.lastModified = responseHeader.lastModified();
-            if (this.lastModified == null) this.lastModified = new Date(serverDate.correctedUTCTime()); // does not exist in header
-        }
-        this.doctype = indexEntryAttribute.docType(responseHeader.mime());
+        // getting the last modified date
+        this.lastModified = resourceInfo.getModificationDate();
+        
+        // getting the doctype
+        this.doctype = indexEntryAttribute.docType(resourceInfo.getMimeType());
         if (this.doctype == indexEntryAttribute.DT_UNKNOWN) this.doctype = indexEntryAttribute.docType(url);
         this.language = indexEntryAttribute.language(url);
 
@@ -822,12 +855,7 @@ public final class plasmaHTCache {
     }
     
     public URL referrerURL() {
-        if (this.requestHeader == null) return null;
-        try {
-            return new URL((String) this.requestHeader.get(httpHeader.REFERER, ""));
-        } catch (Exception e) {
-            return null;
-        }
+        return (this.resInfo==null)?null:this.resInfo.getRefererUrl();
     }
 
     public File cacheFile() {
@@ -846,27 +874,36 @@ public final class plasmaHTCache {
 //        return this.requestHeader;
 //    }
     
-    public httpHeader responseHeader() {
-        return this.responseHeader;
+//    public httpHeader responseHeader() {
+//        return this.responseHeader;        
+//    }
+    
+    public IResourceInfo getDocumentInfo() {
+        return this.resInfo;
     }
     
+    public boolean writeResourceInfo() throws IOException {
+        assert(this.nomalizedURLHash != null) : "URL Hash is null";
+        if (this.resInfo == null) return false;
+        
+        plasmaHTCache.this.responseHeaderDB.set(this.nomalizedURLHash, this.resInfo.getMap());
+        return true;
+    }    
+    
     public String getMimeType() {
-        return (this.responseHeader == null) ? null : this.responseHeader.mime();
+        return (this.resInfo == null) ? null : this.resInfo.getMimeType();
     }
     
     public Date ifModifiedSince() {
-        return (this.requestHeader == null) ? null : this.requestHeader.ifModifiedSince();
+        return (this.resInfo == null) ? null : this.resInfo.ifModifiedSince();
     }
     
     public boolean requestWithCookie() {
-        return (this.requestHeader == null) ? false : this.requestHeader.containsKey(httpHeader.COOKIE);
+        return (this.resInfo == null) ? false : this.resInfo.requestWithCookie();
     }
     
     public boolean requestProhibitsIndexing() {
-        return (this.requestHeader == null) 
-        ? false 
-        : this.requestHeader.containsKey(httpHeader.X_YACY_INDEX_CONTROL) &&
-          ((String)this.requestHeader.get(httpHeader.X_YACY_INDEX_CONTROL)).toUpperCase().equals("NO-INDEX");
+        return (this.resInfo == null) ? false : this.resInfo.requestProhibitsIndexing();
     }
     
     /*
@@ -878,9 +915,10 @@ public final class plasmaHTCache {
     // the following three methods for cache read/write granting shall be as loose as possible
     // but also as strict as necessary to enable caching of most items
 
+    /**
+     * @return NULL if the answer is TRUE, in case of FALSE, the reason as String is returned
+     */
     public String shallStoreCacheForProxy() {
-        // returns NULL if the answer is TRUE
-        // in case of FALSE, the reason as String is returned
 
         // check profile (disabled: we will check this in the plasmaSwitchboard)
         //if (!this.profile.storeHTCache()) { return "storage_not_wanted"; }
@@ -889,8 +927,11 @@ public final class plasmaHTCache {
         // if the storage was requested by prefetching, the request map is null
 
         // check status code
-        if (!(this.responseStatus.startsWith("200") ||
-              this.responseStatus.startsWith("203"))) { return "bad_status_" + this.responseStatus.substring(0,3); }
+        if ((this.resInfo != null) && (!this.resInfo.validResponseStatus(this.responseStatus))) {
+            return "bad_status_" + this.responseStatus.substring(0,3);
+        }        
+//        if (!(this.responseStatus.startsWith("200") ||
+//              this.responseStatus.startsWith("203"))) { return "bad_status_" + this.responseStatus.substring(0,3); }
 
         // check storage location
         // sometimes a file name is equal to a path name in the same directory;
@@ -905,62 +946,10 @@ public final class plasmaHTCache {
         if (isPOST(this.nomalizedURLString) && !this.profile.crawlingQ()) { return "dynamic_post"; }
         if (isCGI(this.nomalizedURLString)) { return "dynamic_cgi"; }
 
-        if (this.requestHeader != null) {
-            // -authorization cases in request
-            // authorization makes pages very individual, and therefore we cannot use the
-            // content in the cache
-            if (this.requestHeader.containsKey(httpHeader.AUTHORIZATION)) { return "personalized"; }
-            // -ranges in request and response
-            // we do not cache partial content
-            if (this.requestHeader.containsKey(httpHeader.RANGE)) { return "partial"; }
-        }
-        // -ranges in request and response
-        // we do not cache partial content
-        if (this.responseHeader != null && this.responseHeader.containsKey(httpHeader.CONTENT_RANGE)) { return "partial"; }
-
-        // -if-modified-since in request
-        // we do not care about if-modified-since, because this case only occurres if the
-        // cache file does not exist, and we need as much info as possible for the indexing
-
-        // -cookies in request
-        // we do not care about cookies, because that would prevent loading more pages
-        // from one domain once a request resulted in a client-side stored cookie
-
-        // -set-cookie in response
-        // we do not care about cookies in responses, because that info comes along
-        // any/many pages from a server and does not express the validity of the page
-        // in modes of life-time/expiration or individuality
-
-        // -pragma in response
-        // if we have a pragma non-cache, we don't cache. usually if this is wanted from
-        // the server, it makes sense
-        String cacheControl = (String) this.responseHeader.get(httpHeader.PRAGMA);
-        if (cacheControl != null && cacheControl.trim().toUpperCase().equals("NO-CACHE")) { return "controlled_no_cache"; }
-
-        // -expires in response
-        // we do not care about expires, because at the time this is called the data is
-        // obvious valid and that header info is used in the indexing later on
-
-        // -cache-control in response
-        // the cache-control has many value options.
-        cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
-        if (cacheControl != null) {
-            cacheControl = cacheControl.trim().toUpperCase();
-            if (cacheControl.startsWith("MAX-AGE=")) {
-                // we need also the load date
-                Date date = this.responseHeader.date();
-                if (date == null) return "stale_no_date_given_in_response";
-                try {
-                    long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
-                    if (serverDate.correctedUTCTime() - date.getTime() > ttl) {
-                        //System.out.println("***not indexed because cache-control");
-                        return "stale_expired";
-                    }
-                } catch (Exception e) {
-                    return "stale_error_" + e.getMessage() + ")";
-                }
-            }
+        if (this.resInfo != null) {
+            return this.resInfo.shallStoreCacheForProxy();
         }
+        
         return null;
     }
 
@@ -971,146 +960,17 @@ public final class plasmaHTCache {
     public boolean shallUseCacheForProxy() {
 //      System.out.println("SHALL READ CACHE: requestHeader = " + requestHeader.toString() + ", responseHeader = " + responseHeader.toString());
 
-        String cacheControl;
-        if (this.requestHeader != null) {
-            // -authorization cases in request
-            if (this.requestHeader.containsKey(httpHeader.AUTHORIZATION)) { return false; }
-
-            // -ranges in request
-            // we do not cache partial content
-            if (this.requestHeader.containsKey(httpHeader.RANGE)) { return false; }
-
-            // if the client requests a un-cached copy of the resource ...
-            cacheControl = (String) this.requestHeader.get(httpHeader.PRAGMA);
-            if (cacheControl != null && cacheControl.trim().toUpperCase().equals("NO-CACHE")) { return false; }
-
-            cacheControl = (String) this.requestHeader.get(httpHeader.CACHE_CONTROL);
-            if (cacheControl != null) {
-                cacheControl = cacheControl.trim().toUpperCase();
-                if (cacheControl.startsWith("NO-CACHE") || cacheControl.startsWith("MAX-AGE=0")) { return false; }
-            }
-        }
-
         // -CGI access in request
         // CGI access makes the page very individual, and therefore not usable in caches
         if (isPOST(this.nomalizedURLString)) { return false; }
         if (isCGI(this.nomalizedURLString)) { return false; }
-
-        // -if-modified-since in request
-        // The entity has to be transferred only if it has
-        // been modified since the date given by the If-Modified-Since header.
-        if (this.requestHeader.containsKey(httpHeader.IF_MODIFIED_SINCE)) {
-            // checking this makes only sense if the cached response contains
-            // a Last-Modified field. If the field does not exist, we go the safe way
-            if (!this.responseHeader.containsKey(httpHeader.LAST_MODIFIED)) { return false; }
-            // parse date
-            Date d1, d2;
-            d2 = this.responseHeader.lastModified(); if (d2 == null) { d2 = new Date(serverDate.correctedUTCTime()); }
-            d1 = this.requestHeader.ifModifiedSince(); if (d1 == null) { d1 = new Date(serverDate.correctedUTCTime()); }
-            // finally, we shall treat the cache as stale if the modification time is after the if-.. time
-            if (d2.after(d1)) { return false; }
-        }
-
-        if (!isPicture(this.responseHeader)) {
-            // -cookies in request
-            // unfortunately, we should reload in case of a cookie
-            // but we think that pictures can still be considered as fresh
-            // -set-cookie in cached response
-            // this is a similar case as for COOKIE.
-            if (this.requestHeader.containsKey(httpHeader.COOKIE) ||
-                this.responseHeader.containsKey(httpHeader.SET_COOKIE) ||
-                this.responseHeader.containsKey(httpHeader.SET_COOKIE2)) {
-                return false; // too strong
-            }
-        }
-
-        // -pragma in cached response
-        // logically, we would not need to care about no-cache pragmas in cached response headers,
-        // because they cannot exist since they are not written to the cache.
-        // So this IF should always fail..
-        cacheControl = (String) this.responseHeader.get(httpHeader.PRAGMA); 
-        if (cacheControl != null && cacheControl.trim().toUpperCase().equals("NO-CACHE")) { return false; }
-
-        // see for documentation also:
-        // http://www.web-caching.com/cacheability.html
-        // http://vancouver-webpages.com/CacheNow/
-
-        // look for freshnes information
-        // if we don't have any freshnes indication, we treat the file as stale.
-        // no handle for freshness control:
-
-        // -expires in cached response
-        // the expires value gives us a very easy hint when the cache is stale
-        Date expires = this.responseHeader.expires();
-        if (expires != null) {
-//          System.out.println("EXPIRES-TEST: expires=" + expires + ", NOW=" + serverDate.correctedGMTDate() + ", url=" + url);
-            if (expires.before(new Date(serverDate.correctedUTCTime()))) { return false; }
-        }
-        Date lastModified = this.responseHeader.lastModified();
-        cacheControl = (String) this.responseHeader.get(httpHeader.CACHE_CONTROL);
-        if (cacheControl == null && lastModified == null && expires == null) { return false; }
-
-        // -lastModified in cached response
-        // we can apply a TTL (Time To Live)  heuristic here. We call the time delta between the last read
-        // of the file and the last modified date as the age of the file. If we consider the file as
-        // middel-aged then, the maximum TTL would be cache-creation plus age.
-        // This would be a TTL factor of 100% we want no more than 10% TTL, so that a 10 month old cache
-        // file may only be treated as fresh for one more month, not more.
-        Date date = this.responseHeader.date();
-        if (lastModified != null) {
-            if (date == null) { date = new Date(serverDate.correctedUTCTime()); }
-            long age = date.getTime() - lastModified.getTime();
-            if (age < 0) { return false; }
-            // TTL (Time-To-Live) is age/10 = (d2.getTime() - d1.getTime()) / 10
-            // the actual living-time is serverDate.correctedGMTDate().getTime() - d2.getTime()
-            // therefore the cache is stale, if serverDate.correctedGMTDate().getTime() - d2.getTime() > age/10
-            if (serverDate.correctedUTCTime() - date.getTime() > age / 10) { return false; }
-        }
-
-        // -cache-control in cached response
-        // the cache-control has many value options.
-        if (cacheControl != null) {
-            cacheControl = cacheControl.trim().toUpperCase();
-            if (cacheControl.startsWith("PRIVATE") ||
-                cacheControl.startsWith("NO-CACHE") ||
-                cacheControl.startsWith("NO-STORE")) {
-                // easy case
-                return false;
-//          } else if (cacheControl.startsWith("PUBLIC")) {
-//              // ok, do nothing
-            } else if (cacheControl.startsWith("MAX-AGE=")) {
-                // we need also the load date
-                if (date == null) { return false; }
-                try {
-                    final long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
-                    if (serverDate.correctedUTCTime() - date.getTime() > ttl) {
-                        return false;
-                    }
-                } catch (Exception e) {
-                    return false;
-                }
-            }
+        
+        if (this.resInfo != null) {
+            return this.resInfo.shallUseCacheForProxy();
         }
+        
         return true;
     }
 
     } // class Entry
-
-    /*
-    public static void main(String[] args) {
-        //String[] s = TimeZone.getAvailableIDs();
-        //for (int i = 0; i < s.length; i++) System.out.println("ZONE=" + s[i]);
-        Calendar c = GregorianCalendar.getInstance();
-        int zoneOffset = c.get(Calendar.ZONE_OFFSET)/(60*60*1000);
-        int DSTOffset = c.get(Calendar.DST_OFFSET)/(60*60*1000);
-        System.out.println("This Offset = " + (zoneOffset + DSTOffset));
-        for (int i = 0; i < 12; i++) {
-            c = new GregorianCalendar(TimeZone.getTimeZone("Etc/GMT-" + i));
-            //c.setTimeZone(TimeZone.getTimeZone("Etc/GMT+0"));
-            System.out.println("Zone offset: "+
-                     c.get(Calendar.ZONE_OFFSET)/(60*60*1000));
-            System.out.println(c.get(GregorianCalendar.HOUR) + ", " + c.getTime() + ", " + c.getTimeInMillis());
-        }
-    }
-     **/
 }
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 676f3e047..f6c0fe2b3 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -44,6 +44,7 @@ package de.anomic.plasma;
 
 import java.io.IOException;
 import de.anomic.net.URL;
+import de.anomic.plasma.cache.IResourceInfo;
 import de.anomic.plasma.crawler.http.CrawlWorker;
 
 import java.util.Enumeration;
@@ -167,15 +168,15 @@ public class plasmaSnippetCache {
         
         // if the snippet is not in the cache, we can try to get it from the htcache
         byte[] resource = null;
-        httpHeader header = null;
+        IResourceInfo docInfo = null;
         try {
-            resource = cacheManager.loadResource(url);
+            resource = this.cacheManager.loadResourceContent(url);
             if ((fetchOnline) && (resource == null)) {
                 plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
                 if (entry != null) {
-                    header = entry.responseHeader();
+                    docInfo = entry.getDocumentInfo();
                 }
-                resource = cacheManager.loadResource(url);
+                resource = this.cacheManager.loadResourceContent(url);
                 source = SOURCE_WEB;
             }
         } catch (IOException e) {
@@ -185,7 +186,7 @@ public class plasmaSnippetCache {
             //System.out.println("cannot load document for URL " + url);
             return new result(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL");
         }
-        plasmaParserDocument document = parseDocument(url, resource, header);
+        plasmaParserDocument document = parseDocument(url, resource, docInfo);
         
         if (document == null) return new result(null, ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
         //System.out.println("loaded document for URL " + url);
@@ -360,18 +361,18 @@ public class plasmaSnippetCache {
         return parseDocument(url, resource, null);
     }
     
-    public plasmaParserDocument parseDocument(URL url, byte[] resource, httpHeader header) {
+    public plasmaParserDocument parseDocument(URL url, byte[] resource, IResourceInfo docInfo) {
         try {
             if (resource == null) return null;
 
             // try to get the header from the htcache directory
-            if (header == null) {
+            if (docInfo == null) {
                 try {
-                    header = this.cacheManager.getCachedResponse(indexURL.urlHash(url));
-                } catch (IOException e) {}
+                    docInfo = this.cacheManager.loadResourceInfo(url);
+                } catch (Exception e) {}
             }
 
-            if (header == null) {
+            if (docInfo == null) {
                 String filename = this.cacheManager.getCachePath(url).getName();
                 int p = filename.lastIndexOf('.');
                 if (    // if no extension is available
@@ -394,8 +395,8 @@ public class plasmaSnippetCache {
                 }
                 return null;
             }
-            if (plasmaParser.supportedMimeTypesContains(header.mime())) {
-                return this.parser.parseSource(url, header.mime(), resource);
+            if (plasmaParser.supportedMimeTypesContains(docInfo.getMimeType())) {
+                return this.parser.parseSource(url, docInfo.getMimeType(), resource);
             }
             return null;
         } catch (InterruptedException e) {
@@ -407,10 +408,10 @@ public class plasmaSnippetCache {
     public byte[] getResource(URL url, boolean fetchOnline, int socketTimeout) {
         // load the url as resource from the web
         try {
-            byte[] resource = cacheManager.loadResource(url);
+            byte[] resource = cacheManager.loadResourceContent(url);
             if ((fetchOnline) && (resource == null)) {
                 loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout);
-                resource = cacheManager.loadResource(url);
+                resource = cacheManager.loadResourceContent(url);
             }
             return resource;
         } catch (IOException e) {
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index fea42ed2d..49f3e15f9 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -829,7 +829,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         /* =========================================================================
          * LOCAL IP ADDRESS CHECK
          * 
-         * check if ip is local ip address
+         * check if ip is local ip address // TODO: remove this procotol specific code here
          * ========================================================================= */
         InetAddress hostAddress = httpc.dnsResolve(entry.url().getHost());
         if (hostAddress == null) {
@@ -856,9 +856,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                 (entry.profile().storeHTCache()) ||
                 (doIndexing && isSupportedContent)
         ) {
-            // store response header
-            if (entry.responseHeader() != null) {
-                this.cacheManager.storeHeader(entry.urlHash(), entry.responseHeader());
+            // store response header            
+            if (entry.writeResourceInfo()) {
                 this.log.logInfo("WROTE HEADER for " + entry.cacheFile());
             }        
             
@@ -868,7 +867,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
             } else {
                 String error = entry.shallStoreCacheForProxy();
                 if (error == null) {
-                    this.cacheManager.writeFile(entry.url(), entry.cacheArray());
+                    this.cacheManager.writeResourceContent(entry.url(), entry.cacheArray());
                     this.log.logFine("WROTE FILE (" + entry.cacheArray().length + " bytes) for " + entry.cacheFile());
                 } else {
                     this.log.logFine("WRITE OF FILE " + entry.cacheFile() + " FORBIDDEN: " + error);
diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
index eaa0e5c9c..eda6f0e90 100644
--- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java
@@ -44,28 +44,27 @@
 
 package de.anomic.plasma;
 
-import de.anomic.http.httpHeader;
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.ArrayList;
+import java.util.Date;
+
 import de.anomic.index.indexURL;
 import de.anomic.kelondro.kelondroBase64Order;
 import de.anomic.kelondro.kelondroException;
-import de.anomic.kelondro.kelondroStack;
 import de.anomic.kelondro.kelondroRow;
+import de.anomic.kelondro.kelondroStack;
+import de.anomic.net.URL;
+import de.anomic.plasma.cache.IResourceInfo;
 import de.anomic.server.logging.serverLog;
-import de.anomic.server.serverDate;
 import de.anomic.yacy.yacySeedDB;
 
-import java.io.File;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import de.anomic.net.URL;
-import java.util.ArrayList;
-import java.util.Date;
-
 public class plasmaSwitchboardQueue {
 
     private kelondroStack sbQueueStack;
     private plasmaCrawlProfile profiles;
-    private plasmaHTCache htCache;
+    plasmaHTCache htCache;
     private plasmaCrawlLURL lurls;
     private File sbQueueStackPath;
     
@@ -191,7 +190,7 @@ public class plasmaSwitchboardQueue {
 
         // computed values
         private plasmaCrawlProfile.entry profileEntry;
-        private httpHeader responseHeader;
+        private IResourceInfo contentInfo;
         private URL referrerURL;
 
         public Entry(URL url, String referrer, Date ifModifiedSince, boolean requestWithCookie,
@@ -206,7 +205,7 @@ public class plasmaSwitchboardQueue {
             this.anchorName = (anchorName==null)?"":anchorName.trim();
             
             this.profileEntry = null;
-            this.responseHeader = null;
+            this.contentInfo = null;
             this.referrerURL = null;
         }
 
@@ -227,7 +226,7 @@ public class plasmaSwitchboardQueue {
             this.anchorName = row.getColString(7, "UTF-8");
 
             this.profileEntry = null;
-            this.responseHeader = null;
+            this.contentInfo = null;
             this.referrerURL = null;
         }
 
@@ -248,7 +247,7 @@ public class plasmaSwitchboardQueue {
             this.anchorName = (row[7] == null) ? null : (new String(row[7], "UTF-8")).trim();
 
             this.profileEntry = null;
-            this.responseHeader = null;
+            this.contentInfo = null;
             this.referrerURL = null;
         }
 
@@ -306,32 +305,24 @@ public class plasmaSwitchboardQueue {
             return profileEntry;
         }
 
-        private httpHeader responseHeader() {
-            if (responseHeader == null) try {
-                responseHeader = htCache.getCachedResponse(indexURL.urlHash(url));
-            } catch (IOException e) {
+        private IResourceInfo getCachedObjectInfo() {
+            if (this.contentInfo == null) try {
+                this.contentInfo = plasmaSwitchboardQueue.this.htCache.loadResourceInfo(this.url);
+            } catch (Exception e) {
                 serverLog.logSevere("PLASMA", "responseHeader: failed to get header", e);
                 return null;
             }
-            return responseHeader;
+            return this.contentInfo;
         }
 
         public String getMimeType() {
-            httpHeader headers = this.responseHeader();
-            return (headers == null) ? null : headers.mime();
+            IResourceInfo info = this.getCachedObjectInfo();
+            return (info == null) ? null : info.getMimeType();
         }
         
         public Date getModificationDate() {
-            Date docDate = null;
-            
-            httpHeader headers = this.responseHeader();
-            if (headers != null) {
-                docDate = headers.lastModified();
-                if (docDate == null) docDate = headers.date();
-            }
-            if (docDate == null) docDate = new Date();   
-            
-            return docDate;
+            IResourceInfo info = this.getCachedObjectInfo();
+            return (info == null) ? new Date() : info.getModificationDate();            
         }
         
         public URL referrerURL() {
@@ -360,6 +351,8 @@ public class plasmaSwitchboardQueue {
          * this method returns null if the answer is 'YES'!
          * if the answer is 'NO' (do not index), it returns a string with the reason
          * to reject the crawling demand in clear text
+         * 
+         * This function is used by plasmaSwitchboard#processResourceStack
          */
         public final String shallIndexCacheForProxy() {
             if (profile() == null) {
@@ -402,91 +395,8 @@ public class plasmaSwitchboardQueue {
                 return "Dynamic_(Requested_With_Cookie)";
             }
 
-            // -set-cookie in response
-            // the set-cookie from the server does not indicate that the content is special
-            // thus we do not care about it here for indexing
-            if (responseHeader() != null) {
-                // a picture cannot be indexed
-                if (plasmaHTCache.isPicture(responseHeader())) {
-                    return "Media_Content_(Picture)";
-                }
-                if (!plasmaHTCache.isText(responseHeader())) {
-                    return "Media_Content_(not_text)";
-                }
-
-                // -if-modified-since in request
-                // if the page is fresh at the very moment we can index it
-                if ((ifModifiedSince != null) && (responseHeader().containsKey(httpHeader.LAST_MODIFIED))) {
-                    // parse date
-                    Date d = responseHeader().lastModified();
-                    if (d == null) {
-                        d = new Date(serverDate.correctedUTCTime());
-                    }
-                    // finally, we shall treat the cache as stale if the modification time is after the if-.. time
-                    if (d.after(ifModifiedSince)) {
-                        //System.out.println("***not indexed because if-modified-since");
-                        return "Stale_(Last-Modified>Modified-Since)";
-                    }
-                }
-
-                // -pragma in cached response
-                if (responseHeader().containsKey(httpHeader.PRAGMA) &&
-                    ((String) responseHeader().get(httpHeader.PRAGMA)).toUpperCase().equals("NO-CACHE")) {
-                    return "Denied_(pragma_no_cache)";
-                }
-
-                // see for documentation also:
-                // http://www.web-caching.com/cacheability.html
-
-                // look for freshnes information
-
-                // -expires in cached response
-                // the expires value gives us a very easy hint when the cache is stale
-                // sometimes, the expires date is set to the past to prevent that a page is cached
-                // we use that information to see if we should index it
-                final Date expires = responseHeader().expires();
-                if (expires != null && expires.before(new Date(serverDate.correctedUTCTime()))) {
-                    return "Stale_(Expired)";
-                }
-
-                // -lastModified in cached response
-                // this information is too weak to use it to prevent indexing
-                // even if we can apply a TTL heuristic for cache usage
-
-                // -cache-control in cached response
-                // the cache-control has many value options.
-                String cacheControl = (String) responseHeader.get(httpHeader.CACHE_CONTROL);
-                if (cacheControl != null) {
-                    cacheControl = cacheControl.trim().toUpperCase();
-                    /* we have the following cases for cache-control:
-                       "public" -- can be indexed
-                       "private", "no-cache", "no-store" -- cannot be indexed
-                       "max-age=<delta-seconds>" -- stale/fresh dependent on date
-                     */
-                    if (cacheControl.startsWith("PRIVATE") ||
-                        cacheControl.startsWith("NO-CACHE") ||
-                        cacheControl.startsWith("NO-STORE")) {
-                        // easy case
-                        return "Stale_(denied_by_cache-control=" + cacheControl + ")";
-//                  } else if (cacheControl.startsWith("PUBLIC")) {
-//                      // ok, do nothing
-                    } else if (cacheControl.startsWith("MAX-AGE=")) {
-                        // we need also the load date
-                        final Date date = responseHeader().date();
-                        if (date == null) {
-                            return "Stale_(no_date_given_in_response)";
-                        }
-                        try {
-                            final long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live
-                            if (serverDate.correctedUTCTime() - date.getTime() > ttl) {
-                                //System.out.println("***not indexed because cache-control");
-                                return "Stale_(expired_by_cache-control)";
-                            }
-                        } catch (Exception e) {
-                            return "Error_(" + e.getMessage() + ")";
-                        }
-                    }
-                }
+            if (getCachedObjectInfo() != null) {
+                return this.getCachedObjectInfo().shallIndexCacheForProxy();
             }
             return null;
         }
@@ -496,6 +406,8 @@ public class plasmaSwitchboardQueue {
          * this method returns null if the answer is 'YES'!
          * if the answer is 'NO' (do not index), it returns a string with the reason
          * to reject the crawling demand in clear text
+         * 
+         * This function is used by plasmaSwitchboard#processResourceStack
          */
         public final String shallIndexCacheForCrawler() {
             if (profile() == null) {
@@ -520,9 +432,9 @@ public class plasmaSwitchboardQueue {
             // we checked that in shallStoreCache
 
             // a picture cannot be indexed
-            if (responseHeader() != null) {
-                if (plasmaHTCache.isPicture(responseHeader())) { return "Media_Content_(Picture)"; }
-                if (!plasmaHTCache.isText(responseHeader())) { return "Media_Content_(not_text)"; }
+            if (getCachedObjectInfo() != null) {
+                String status = this.getCachedObjectInfo().shallIndexCacheForProxy();
+                if (status != null) return status;
             }
             if (plasmaHTCache.noIndexingURL(nURL)) { return "Media_Content_(forbidden)"; }