code simplification

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6233 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 4da9042e8a
parent 1d8d51075c
commit 4da9042e8a
8 changed files with 189 additions and 258 deletions
--- a/source/de/anomic/crawler/retrieval/FTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/FTPLoader.java
@ -56,37 +56,20 @@ public class FTPLoader {
        maxFileSize = (int) sb.getConfigLong("crawler.ftp.maxFileSize", -1l);
    }

-    protected Response createCacheEntry(final Request request, final String mimeType, final Date fileDate) {
-        if (request == null) return null;
-        RequestHeader requestHeader = new RequestHeader();
-        if (request.referrerhash() != null) requestHeader.put(RequestHeader.REFERER, sb.getURL(request.referrerhash()).toNormalform(true, false));
-        ResponseHeader responseHeader = new ResponseHeader();
-        responseHeader.put(HeaderFramework.LAST_MODIFIED, DateFormatter.formatRFC1123(fileDate));
-        responseHeader.put(HeaderFramework.CONTENT_TYPE, mimeType);
-        Response metadata = new Response(
-                request, 
-                requestHeader,
-                responseHeader,
-                "OK",
-                sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle()));
-        Cache.storeMetadata(responseHeader, metadata);
-        return metadata;
-    }
-
    /**
     * Loads the entry from a ftp-server
     * 
-     * @param entry
+     * @param request
     * @return
     */
-    public Response load(final Request entry) throws IOException {
+    public Response load(final Request request) throws IOException {
        
        long start = System.currentTimeMillis();
-        final yacyURL entryUrl = entry.url();
+        final yacyURL entryUrl = request.url();
        final String fullPath = getPath(entryUrl);

        // the return value
-        Response htCache = null;
+        Response response = null;

        // determine filename and path
        String file, path;
@ -125,16 +108,27 @@ public class FTPLoader {

                if (file.length() == 0) {
                    // directory -> get list of files
-                    // create a htcache entry
-                    htCache = createCacheEntry(entry, "text/html", new Date());
-                    byte[] dirList = generateDirlist(ftpClient, entry, path);
+                    RequestHeader requestHeader = new RequestHeader();
+                    if (request.referrerhash() != null) requestHeader.put(RequestHeader.REFERER, sb.getURL(request.referrerhash()).toNormalform(true, false));
+                    ResponseHeader responseHeader = new ResponseHeader();
+                    responseHeader.put(HeaderFramework.LAST_MODIFIED, DateFormatter.formatRFC1123(new Date()));
+                    responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
+                    response = new Response(
+                            request, 
+                            requestHeader,
+                            responseHeader,
+                            "OK",
+                            sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle()));
+                    Cache.storeMetadata(request.url(), responseHeader);
+                    
+                    byte[] dirList = generateDirlist(ftpClient, request, path);
                    if (dirList == null) {
-                        htCache = null;
+                        response = null;
                    }
                } else {
                    // file -> download
                    try {
-                        htCache = getFile(ftpClient, entry);
+                        response = getFile(ftpClient, request);
                    } catch (final Exception e) {
                        // add message to errorLog
                        (new PrintStream(berr)).print(e.getMessage());
@ -144,15 +138,15 @@ public class FTPLoader {
        }

        // pass the downloaded resource to the cache manager
-        if (berr.size() > 0 || htCache == null) {
+        if (berr.size() > 0 || response == null) {
            // some error logging
            final String detail = (berr.size() > 0) ? "\n    Errorlog: " + berr.toString() : "";
-            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "server download" + detail);
-            throw new IOException("FTPLoader: Unable to download URL " + entry.url().toString() + detail);
+            sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "server download" + detail);
+            throw new IOException("FTPLoader: Unable to download URL " + request.url().toString() + detail);
        }
        
-        Latency.update(entry.url().hash().substring(6), entry.url().getHost(), System.currentTimeMillis() - start);
-        return htCache;
+        Latency.update(request.url().hash().substring(6), request.url().getHost(), System.currentTimeMillis() - start);
+        return response;
    }

    /**
@ -211,26 +205,26 @@ public class FTPLoader {

    /**
     * @param ftpClient
-     * @param entry
+     * @param request
     * @param htCache
     * @param cacheFile
     * @return
     * @throws Exception
     */
-    private Response getFile(final ftpc ftpClient, final Request entry) throws Exception {
+    private Response getFile(final ftpc ftpClient, final Request request) throws Exception {
        // determine the mimetype of the resource
-        final yacyURL entryUrl = entry.url();
+        final yacyURL entryUrl = request.url();
        final String mimeType = Parser.mimeOf(entryUrl);
        final String path = getPath(entryUrl);

        // if the mimetype and file extension is supported we start to download
        // the file
-        Response htCache = null;
+        Response response = null;
        String supportError = Parser.supports(entryUrl, mimeType);
        if (supportError != null) {
            // reject file
-            log.logInfo("PARSER REJECTED URL " + entry.url().toString() + ": " + supportError);
-            sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, supportError);
+            log.logInfo("PARSER REJECTED URL " + request.url().toString() + ": " + supportError);
+            sb.crawlQueues.errorURL.newEntry(request, this.sb.peers.mySeed().hash, new Date(), 1, supportError);
            throw new Exception(supportError);
        } else {
            // abort the download if content is too long
@ -242,19 +236,30 @@ public class FTPLoader {
                // determine the file date
                final Date fileDate = ftpClient.entryDate(path);

-                // create a htcache entry
-                htCache = createCacheEntry(entry, mimeType, fileDate);
+                // create a cache entry
+                RequestHeader requestHeader = new RequestHeader();
+                if (request.referrerhash() != null) requestHeader.put(RequestHeader.REFERER, sb.getURL(request.referrerhash()).toNormalform(true, false));
+                ResponseHeader responseHeader = new ResponseHeader();
+                responseHeader.put(HeaderFramework.LAST_MODIFIED, DateFormatter.formatRFC1123(fileDate));
+                responseHeader.put(HeaderFramework.CONTENT_TYPE, mimeType);
+                response = new Response(
+                        request, 
+                        requestHeader,
+                        responseHeader,
+                        "OK",
+                        sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle()));
+                Cache.storeMetadata(request.url(), responseHeader);

                // download the remote file
                byte[] b = ftpClient.get(path);
-                htCache.setContent(b);
+                response.setContent(b);
            } else {
-                log.logInfo("REJECTED TOO BIG FILE with size " + size + " Bytes for URL " + entry.url().toString());
-                sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, "file size limit exceeded");
+                log.logInfo("REJECTED TOO BIG FILE with size " + size + " Bytes for URL " + request.url().toString());
+                sb.crawlQueues.errorURL.newEntry(request, this.sb.peers.mySeed().hash, new Date(), 1, "file size limit exceeded");
                throw new Exception("file size exceeds limit");
            }
        }
-        return htCache;
+        return response;
    }

    /**
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@ -36,7 +36,6 @@ import de.anomic.http.client.Cache;
 import de.anomic.http.metadata.HeaderFramework;
 import de.anomic.http.metadata.RequestHeader;
 import de.anomic.http.metadata.ResponseContainer;
-import de.anomic.http.metadata.ResponseHeader;
 import de.anomic.search.Switchboard;
 import de.anomic.yacy.yacyURL;
 import de.anomic.yacy.logging.Log;
@ -73,27 +72,7 @@ public final class HTTPLoader {
        
        // refreshing timeout value
        this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 10000);
-    }
-
-    /**
-     * @param entry
-     * @param requestDate
-     * @param requestHeader
-     * @param responseHeader
-     * @param responseStatus Status-Code SPACE Reason-Phrase
-     * @return
-     */
-    protected Response createCacheEntry(final Request request, final Date requestDate, final RequestHeader requestHeader, final ResponseHeader responseHeader, final String responseStatus) {
-        Response metadata = new Response(
-        		request,
-        		requestHeader,
-                responseHeader, 
-                responseStatus,
-                sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle())
-        );
-        Cache.storeMetadata(responseHeader, metadata);
-        return metadata;
-    }    
+    }  
   
    public Response load(final Request entry) throws IOException {
        long start = System.currentTimeMillis();
@ -102,158 +81,143 @@ public final class HTTPLoader {
        return doc;
    }
    
-    private Response load(final Request entry, final int retryCount) throws IOException {
+    private Response load(final Request request, final int retryCount) throws IOException {

        if (retryCount < 0) {
-            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "redirection counter exceeded").store();
-            throw new IOException("Redirection counter exceeded for URL " + entry.url().toString() + ". Processing aborted.");
+            sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "redirection counter exceeded").store();
+            throw new IOException("Redirection counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
        }
        
-        final Date requestDate = new Date(); // remember the time...
-        final String host = entry.url().getHost();
-        final String path = entry.url().getFile();
-        int port = entry.url().getPort();
-        final boolean ssl = entry.url().getProtocol().equals("https");
+        final String host = request.url().getHost();
+        final String path = request.url().getFile();
+        int port = request.url().getPort();
+        final boolean ssl = request.url().getProtocol().equals("https");
        if (port < 0) port = (ssl) ? 443 : 80;
        
        // if not the right file type then reject file
-        String supportError = Parser.supportsExtension(entry.url());
+        String supportError = Parser.supportsExtension(request.url());
        if (supportError != null) {
-            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, supportError);
+            sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, supportError);
            throw new IOException("REJECTED WRONG EXTENSION TYPE: " + supportError);
        } 
        
        // check if url is in blacklist
        final String hostlow = host.toLowerCase();
        if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
-            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "url in blacklist").store();
-            throw new IOException("CRAWLER Rejecting URL '" + entry.url().toString() + "'. URL is in blacklist.");
+            sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "url in blacklist").store();
+            throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
        }
        
        // take a file from the net
-        Response htCache = null;
+        Response response = null;
        final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE);
-        //try {
-            // create a request header
-            final RequestHeader requestHeader = new RequestHeader();
-            requestHeader.put(HeaderFramework.USER_AGENT, crawlerUserAgent);
-            yacyURL refererURL = null;
-            if (entry.referrerhash() != null) refererURL = sb.getURL(entry.referrerhash());
-            if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
-            requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
-            requestHeader.put(HeaderFramework.ACCEPT_CHARSET, sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET));
-            requestHeader.put(HeaderFramework.ACCEPT_ENCODING, sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));

-            // HTTP-Client
-            final Client client = new Client(socketTimeout, requestHeader);
-            
-            ResponseContainer res = null;
-            try {
-                // send request
-                res = client.GET(entry.url().toString(), maxFileSize);
-                // FIXME: 30*-handling (bottom) is never reached
-                // we always get the final content because httpClient.followRedirects = true
+        // create a request header
+        final RequestHeader requestHeader = new RequestHeader();
+        requestHeader.put(HeaderFramework.USER_AGENT, crawlerUserAgent);
+        yacyURL refererURL = null;
+        if (request.referrerhash() != null) refererURL = sb.getURL(request.referrerhash());
+        if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
+        requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
+        requestHeader.put(HeaderFramework.ACCEPT_CHARSET, sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET));
+        requestHeader.put(HeaderFramework.ACCEPT_ENCODING, sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));

-                if (res.getStatusCode() == 200 || res.getStatusCode() == 203) {
-                    // the transfer is ok
-                    
-                    // create a new cache entry
-                    htCache = createCacheEntry(entry, requestDate, requestHeader, res.getResponseHeader(), res.getStatusLine()); 
-                    
-                    // request has been placed and result has been returned. work off response
-                    //try {
-                    
-                	// if the response has not the right file type then reject file
-                    supportError = Parser.supports(entry.url(), res.getResponseHeader().mime());
-                    if (supportError != null) {
-                    	sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, supportError);
-                    	throw new IOException("REJECTED WRONG MIME TYPE: " + supportError);
-                    }
+        // HTTP-Client
+        final Client client = new Client(socketTimeout, requestHeader);
+        
+        ResponseContainer res = null;
+        try {
+            // send request
+            res = client.GET(request.url().toString(), maxFileSize);
+            // FIXME: 30*-handling (bottom) is never reached
+            // we always get the final content because httpClient.followRedirects = true

-                    /*
-                    // check if the content length is allowed
-                    long contentLength = res.getResponseHeader().getContentLength();
-                    if (maxFileSize > 0 && contentLength > maxFileSize) {
-                    	sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "file size limit exceeded");                    
-                    	throw new IOException("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (HEAD)");
-                    }
-                    */
+            if (res.getStatusCode() == 200 || res.getStatusCode() == 203) {
+                // the transfer is ok
+                
+                // create a new cache entry
+                response = new Response(
+                		request,
+                		requestHeader,
+                		res.getResponseHeader(), 
+                		res.getStatusLine(),
+                        sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle())
+                );
+                Cache.storeMetadata(request.url(), res.getResponseHeader());
+                
+                // request has been placed and result has been returned. work off response
+                
+            	// if the response has not the right file type then reject file
+                supportError = Parser.supports(request.url(), res.getResponseHeader().mime());
+                if (supportError != null) {
+                	sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, supportError);
+                	throw new IOException("REJECTED WRONG MIME TYPE: " + supportError);
+                }
+
+                // we write the new cache entry to file system directly
+                res.setAccountingName("CRAWLER");
+                final byte[] responseBody = res.getData();
+                long contentLength = responseBody.length;
+
+                // check length again in case it was not possible to get the length before loading
+                if (maxFileSize > 0 && contentLength > maxFileSize) {
+                	sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "file size limit exceeded");                    
+                	throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
+                }

-                    // we write the new cache entry to file system directly
-                    res.setAccountingName("CRAWLER");
-                    final byte[] responseBody = res.getData();
-                    long contentLength = responseBody.length;
+                response.setContent(responseBody);

-                    // check length again in case it was not possible to get the length before loading
-                    if (maxFileSize > 0 && contentLength > maxFileSize) {
-                    	sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "file size limit exceeded");                    
-                    	throw new IOException("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
+                return response;
+            } else if (res.getStatusLine().startsWith("30")) {
+                if (res.getResponseHeader().containsKey(HeaderFramework.LOCATION)) {
+                    // getting redirection URL
+                    String redirectionUrlString = res.getResponseHeader().get(HeaderFramework.LOCATION);
+                    redirectionUrlString = redirectionUrlString.trim();
+
+                    if (redirectionUrlString.length() == 0) {
+                        sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "redirection header empy");
+                        throw new IOException("CRAWLER Redirection of URL=" + request.url().toString() + " aborted. Location header is empty.");
                    }
+                    
+                    // normalizing URL
+                    final yacyURL redirectionUrl = yacyURL.newURL(request.url(), redirectionUrlString);

-                    htCache.setContent(responseBody);
+                    // restart crawling with new url
+                    this.log.logInfo("CRAWLER Redirection detected ('" + res.getStatusLine() + "') for URL " + request.url().toString());
+                    this.log.logInfo("CRAWLER ..Redirecting request to: " + redirectionUrl);

-                    return htCache;
-                        /*
-                    } catch (final SocketException e) {
-                        // this may happen if the client suddenly closes its connection
-                        // maybe the user has stopped loading
-                        // in that case, we are not responsible and just forget it
-                        // but we clean the cache also, since it may be only partial
-                        // and most possible corrupted
-                        this.log.logSevere("CRAWLER LOADER ERROR1: with URL=" + entry.url().toString() + ": " + e.toString());
-                        sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_CONNECTION_ERROR);
-                        htCache = null;
-                    }*/
-                } else if (res.getStatusLine().startsWith("30")) {
-                        if (res.getResponseHeader().containsKey(HeaderFramework.LOCATION)) {
-                            // getting redirection URL
-                            String redirectionUrlString = res.getResponseHeader().get(HeaderFramework.LOCATION);
-                            redirectionUrlString = redirectionUrlString.trim();
-    
-                            if (redirectionUrlString.length() == 0) {
-                                sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "redirection header empy");
-                                throw new IOException("CRAWLER Redirection of URL=" + entry.url().toString() + " aborted. Location header is empty.");
-                            }
-                            
-                            // normalizing URL
-                            final yacyURL redirectionUrl = yacyURL.newURL(entry.url(), redirectionUrlString);
-    
-                            // restart crawling with new url
-                            this.log.logInfo("CRAWLER Redirection detected ('" + res.getStatusLine() + "') for URL " + entry.url().toString());
-                            this.log.logInfo("CRAWLER ..Redirecting request to: " + redirectionUrl);
-    
-                            // if we are already doing a shutdown we don't need to retry crawling
-                            if (Thread.currentThread().isInterrupted()) {
-                                sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "server shutdown");
-                                throw new IOException("CRAWLER Retry of URL=" + entry.url().toString() + " aborted because of server shutdown.");
-                            }
-    
-                            // generating url hash
-                            final String urlhash = redirectionUrl.hash();
-                            
-                            // check if the url was already indexed
-                            final String dbname = sb.urlExists(urlhash);
-                            if (dbname != null) {
-                                sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "redirection to double content");
-                                throw new IOException("CRAWLER Redirection of URL=" + entry.url().toString() + " ignored. The url appears already in db " + dbname);
-                            }
-                            
-                            // retry crawling with new url
-                            entry.redirectURL(redirectionUrl);
-                            return load(entry, retryCount - 1);
-                        }
-                } else {
-                    // if the response has not the right response type then reject file
-                    sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "wrong http status code " + res.getStatusCode() +  ")");
-                    throw new IOException("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + entry.url().toString());
-                }
-            } finally {
-                if(res != null) {
-                    // release connection
-                    res.closeStream();
+                    // if we are already doing a shutdown we don't need to retry crawling
+                    if (Thread.currentThread().isInterrupted()) {
+                        sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "server shutdown");
+                        throw new IOException("CRAWLER Retry of URL=" + request.url().toString() + " aborted because of server shutdown.");
+                    }
+
+                    // generating url hash
+                    final String urlhash = redirectionUrl.hash();
+                    
+                    // check if the url was already indexed
+                    final String dbname = sb.urlExists(urlhash);
+                    if (dbname != null) {
+                        sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "redirection to double content");
+                        throw new IOException("CRAWLER Redirection of URL=" + request.url().toString() + " ignored. The url appears already in db " + dbname);
+                    }
+                    
+                    // retry crawling with new url
+                    request.redirectURL(redirectionUrl);
+                    return load(request, retryCount - 1);
                }
+            } else {
+                // if the response has not the right response type then reject file
+                sb.crawlQueues.errorURL.newEntry(request, sb.peers.mySeed().hash, new Date(), 1, "wrong http status code " + res.getStatusCode() +  ")");
+                throw new IOException("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + request.url().toString());
+            }
+        } finally {
+            if(res != null) {
+                // release connection
+                res.closeStream();
            }
-            return htCache;
+        }
+        return response;
    }
    
 }
--- a/source/de/anomic/crawler/retrieval/Response.java
+++ b/source/de/anomic/crawler/retrieval/Response.java
@ -29,8 +29,8 @@ package de.anomic.crawler.retrieval;
 import java.util.Date;

 import de.anomic.crawler.CrawlProfile;
+import de.anomic.document.Classification;
 import de.anomic.document.Parser;
-import de.anomic.http.client.Cache;
 import de.anomic.http.metadata.HeaderFramework;
 import de.anomic.http.metadata.RequestHeader;
 import de.anomic.http.metadata.ResponseHeader;
@ -182,10 +182,6 @@ public class Response {
        return doctype;
    }

-    public String urlHash() {
-        return this.url().hash();
-    }
-
    public Date lastModified() {
        Date docDate = null;
        
@ -389,7 +385,7 @@ public class Response {
            }

            final String mimeType = getMimeType();
-            if (!Cache.isPicture(mimeType)) {
+            if (!Classification.isPictureMime(mimeType)) {
                // -cookies in request
                // unfortunately, we should reload in case of a cookie
                // but we think that pictures can still be considered as fresh
@ -513,7 +509,7 @@ public class Response {
        // we checked that in shallStoreCache

        // a picture cannot be indexed
-        if (Cache.noIndexingURL(url())) {
+        if (Classification.isMediaExtension(url().getFileExtension())) {
            return "Media_Content_(forbidden)";
        }

@ -532,7 +528,7 @@ public class Response {
            
            // a picture cannot be indexed
            final String mimeType = responseHeader.mime();
-            if (Cache.isPicture(mimeType)) {
+            if (Classification.isPictureMime(mimeType)) {
                return "Media_Content_(Picture)";
            }
            String parserError = Parser.supportsMime(mimeType);
@ -652,11 +648,11 @@ public class Response {
        // a picture cannot be indexed
        if (responseHeader != null) {
            final String mimeType = responseHeader.mime();
-            if (Cache.isPicture(mimeType)) { return "Media_Content_(Picture)"; }
+            if (Classification.isPictureMime(mimeType)) { return "Media_Content_(Picture)"; }
            String parserError = Parser.supportsMime(mimeType);
            if (parserError != null) { return "Media_Content, parser error: " + parserError; }
        }
-        if (Cache.noIndexingURL(url())) { return "Media_Content_(forbidden)"; }
+        if (Classification.isMediaExtension(url().getFileExtension())) { return "Media_Content_(forbidden)"; }

        // -if-modified-since in request
        // if the page is fresh at the very moment we can index it
--- a/source/de/anomic/document/Classification.java
+++ b/source/de/anomic/document/Classification.java
@ -97,4 +97,9 @@ public class Classification {
        return appsExtSet.contains(appsExt.trim().toLowerCase());
    }
    
+    public static boolean isPictureMime(final String mimeType) {
+        if (mimeType == null) return false;
+        return mimeType.toUpperCase().startsWith("IMAGE");
+    }
+
 }
--- a/source/de/anomic/http/client/Cache.java
+++ b/source/de/anomic/http/client/Cache.java
@ -41,8 +41,6 @@ import java.io.InputStream;
 import java.util.HashMap;
 import java.util.Map;

-import de.anomic.crawler.retrieval.Response;
-import de.anomic.document.Classification;
 import de.anomic.http.metadata.ResponseHeader;
 import de.anomic.kelondro.blob.ArrayStack;
 import de.anomic.kelondro.blob.Compressor;
@ -109,44 +107,15 @@ public final class Cache {
        responseHeaderDB.close();
        fileDB.close(true);
    }
-
-    public static boolean isPicture(final String mimeType) {
-        if (mimeType == null) return false;
-        return mimeType.toUpperCase().startsWith("IMAGE");
-    }
-
-    public static boolean noIndexingURL(final yacyURL url) {
-        if (url == null) return false;
-        String urlString = url.toString().toLowerCase();
-        
-        //http://www.yacy.net/getimage.php?image.png
-        
-        int idx = urlString.indexOf("?");
-        if (idx > 0) urlString = urlString.substring(0,idx);
-
-        //http://www.yacy.net/getimage.php
-        
-        idx = urlString.lastIndexOf(".");
-        if (idx > 0) urlString = urlString.substring(idx+1);
-
-        //php
-        
-        return Classification.isMediaExtension(urlString);
-    }
-
-
+    
    // Store to Cache
-    public static void storeMetadata(
-            final ResponseHeader responseHeader,
-            Response metadata
-    ) {
+    public static void storeMetadata(final yacyURL url, final ResponseHeader responseHeader) {
        if (responseHeader != null) try {
            // store the response header into the header database
            final HashMap<String, String> hm = new HashMap<String, String>();
            hm.putAll(responseHeader);
-            hm.put("@@URL", metadata.url().toNormalform(false, false));
-            hm.put("@@DEPTH", Integer.toString(metadata.depth()));
-            responseHeaderDB.put(metadata.urlHash(), hm);
+            hm.put("@@URL", url.toNormalform(true, false));
+            responseHeaderDB.put(url.hash(), hm);
        } catch (final Exception e) {
            log.logWarning("could not write ResourceInfo: "
                    + e.getClass() + ": " + e.getMessage());
--- a/source/de/anomic/http/metadata/ResponseContainer.java
+++ b/source/de/anomic/http/metadata/ResponseContainer.java
@ -71,20 +71,12 @@ public class ResponseContainer {
 	 */
 	public ResponseHeader getResponseHeader() {
 		final ResponseHeader responseHeader = new ResponseHeader();
-		for (final Header header : getHeaders()) {
+		for (final Header header : method.getResponseHeaders()) {
 			responseHeader.add(header.getName(), header.getValue());
 		}
 		return responseHeader;
 	}

-	/**
-	 * @see org.apache.commons.httpclient.HttpMethod#getResponseHeaders()
-	 * @return the headers
-	 */
-	private Header[] getHeaders() {
-		return method.getResponseHeaders();
-	}
-
 	/**
 	 * @see org.apache.commons.httpclient.HttpMethod#getResponseBody()
 	 * @return
--- a/source/de/anomic/http/server/HTTPDProxyHandler.java
+++ b/source/de/anomic/http/server/HTTPDProxyHandler.java
@ -397,17 +397,17 @@ public final class HTTPDProxyHandler {
                        0, 
                        0, 
                        0);
-                final Response cacheEntry = new Response(
+                final Response response = new Response(
                		request,
                        requestHeader,
                        cachedResponseHeader,
                        "200 OK",
                        sb.crawler.defaultProxyProfile
                );
-                Cache.storeMetadata(cachedResponseHeader, cacheEntry); // TODO: check if this storeMetadata is necessary
+                //Cache.storeMetadata(cachedResponseHeader, response); // TODO: check if this storeMetadata is necessary

                byte[] cacheContent = Cache.getResourceContent(url);
-                if (cacheContent != null && cacheEntry.shallUseCacheForProxy()) {
+                if (cacheContent != null && response.shallUseCacheForProxy()) {
                    if (theLogger.isFinest()) theLogger.logFinest(reqID + " fulfill request from cache");
                    fulfillRequestFromCache(conProp, url, requestHeader, cachedResponseHeader, cacheContent, countedRespond);
                } else {
@ -518,14 +518,14 @@ public final class HTTPDProxyHandler {
                        0, 
                        0, 
                        0);
-                final Response cacheEntry = new Response(
+                final Response response = new Response(
                		request,
                        requestHeader,
                        responseHeader,
                        res.getStatusLine(),
                        sb.crawler.defaultProxyProfile
                );
-                Cache.storeMetadata(responseHeader, cacheEntry);
+                Cache.storeMetadata(request.url(), responseHeader);

                // handle incoming cookies
                handleIncomingCookies(responseHeader, host, ip);
@ -550,9 +550,9 @@ public final class HTTPDProxyHandler {

                    final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);

-                    final String storeError = cacheEntry.shallStoreCacheForProxy();
-                    final boolean storeHTCache = cacheEntry.profile().storeHTCache();
-                    final String supportError = Parser.supports(cacheEntry.url(), cacheEntry.getMimeType());
+                    final String storeError = response.shallStoreCacheForProxy();
+                    final boolean storeHTCache = response.profile().storeHTCache();
+                    final String supportError = Parser.supports(response.url(), response.getMimeType());
                    if (
                            /*
                             * Now we store the response into the htcache directory if
@ -583,8 +583,8 @@ public final class HTTPDProxyHandler {
                        if (sizeBeforeDelete == -1) {
                            // totally fresh file
                            //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
-                            cacheEntry.setContent(cacheArray);
-                            sb.htEntryStoreProcess(cacheEntry);
+                            response.setContent(cacheArray);
+                            sb.htEntryStoreProcess(response);
                            conProp.setProperty(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
                        } else if (cacheArray != null && sizeBeforeDelete == cacheArray.length) {
                            // before we came here we deleted a cache entry
@ -595,8 +595,8 @@ public final class HTTPDProxyHandler {
                        } else {
                            // before we came here we deleted a cache entry
                            //cacheEntry.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD;
-                            cacheEntry.setContent(cacheArray);
-                            sb.htEntryStoreProcess(cacheEntry);
+                            response.setContent(cacheArray);
+                            sb.htEntryStoreProcess(response);
                            conProp.setProperty(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS");
                        }
                    } else {
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -1266,7 +1266,7 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi
            Thread readerThread = new Thread(reader, "Surrogate-Reader " + surrogateFile.getAbsolutePath());
            readerThread.start();
            DCEntry surrogate;
-            Response queueentry;
+            Response response;
            while ((surrogate = reader.take()) != DCEntry.poison) {
                // check if url is in accepted domain
                final String urlRejectReason = crawlStacker.urlInAcceptedDomain(surrogate.url());
@ -1289,8 +1289,8 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi
                        0, 
                        0        
                );
-                queueentry = new Response(request, null, null, "200", this.crawler.defaultSurrogateProfile);
-                indexingQueueEntry queueEntry = new indexingQueueEntry(queueentry, document, null);
+                response = new Response(request, null, null, "200", this.crawler.defaultSurrogateProfile);
+                indexingQueueEntry queueEntry = new indexingQueueEntry(response, document, null);
                
                // place the queue entry into the concurrent process of the condenser (document analysis)
                try {