different handling of error cases that occur during loading files with http or ftp:

methods throw exception instead of returning an error string git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5328 6c8d7289-2bf4-0310-a012-ef5d649a1542
17 years ago · 674ad2d55b
parent 538359a0ff
commit 674ad2d55b
15 changed files with 98 additions and 177 deletions
--- a/htroot/ViewImage.java
+++ b/htroot/ViewImage.java
@ -35,6 +35,7 @@ import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
+import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacyURL;
 import de.anomic.ymage.ymageImageParser;

@ -79,7 +80,12 @@ public class ViewImage {
        // getting the image as stream
        Image scaled = iconcache.get(urlString);
        if (scaled == null) {
-            final Object[] resource = plasmaSnippetCache.getResource(url, true, timeout, false, true);
+            Object[] resource = null;
+            try {
+                resource = plasmaSnippetCache.getResource(url, true, timeout, false, true);
+            } catch (IOException e) {
+                serverLog.logWarning("ViewImage", "cannot load: " + e.getMessage());
+            }
            byte[] imgb = null;
            if (resource == null) {
                if (urlString.endsWith(".ico")) {
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@ -465,7 +465,7 @@ public class CrawlQueues {
            final boolean keepInMemory,
            final boolean forText,
            final boolean global
-    ) {
+    ) throws IOException {
        
        final CrawlEntry centry = new CrawlEntry(
                sb.webIndex.seedDB.mySeed().hash, 
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@ -380,7 +380,7 @@ public final class CrawlStacker extends Thread {
        // check if the protocol is supported
        final String urlProtocol = entry.url().getProtocol();
        if (!sb.crawlQueues.isSupportedProtocol(urlProtocol)) {
-            reason = ErrorURL.DENIED_UNSUPPORTED_PROTOCOL;
+            reason = "unsupported protocol";
            this.log.logSevere("Unsupported protocol in URL '" + entry.url().toString() + "'. " + 
                               "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
            return reason;            
@ -396,7 +396,7 @@ public final class CrawlStacker extends Thread {
        
        // check blacklist
        if (plasmaSwitchboard.urlBlacklist.isListed(indexReferenceBlacklist.BLACKLIST_CRAWLER, entry.url())) {
-            reason = ErrorURL.DENIED_URL_IN_BLACKLIST;
+            reason = "url in blacklist";
            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' is in blacklist. " +
                             "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
            return reason;
@ -411,8 +411,7 @@ public final class CrawlStacker extends Thread {
        
        // filter deny
        if ((entry.depth() > 0) && (!(entry.url().toString().matches(profile.generalFilter())))) {
-            reason = ErrorURL.DENIED_URL_DOES_NOT_MATCH_FILTER;
-
+            reason = "url does not match general filter";
            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' does not match crawling filter '" + profile.generalFilter() + "'. " +
                             "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
            return reason;
@ -420,7 +419,7 @@ public final class CrawlStacker extends Thread {
        
        // deny cgi
        if (entry.url().isCGI())  {
-            reason = ErrorURL.DENIED_CGI_URL;
+            reason = "cgi url not allowed";

            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' is CGI URL. " + 
                             "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
@ -429,7 +428,7 @@ public final class CrawlStacker extends Thread {
        
        // deny post properties
        if (entry.url().isPOST() && !(profile.crawlingQ()))  {
-            reason = ErrorURL.DENIED_POST_URL;
+            reason = "post url not allowed";

            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' is post URL. " + 
                             "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
@ -445,7 +444,7 @@ public final class CrawlStacker extends Thread {

        // deny urls that do not match with the profile domain list
        if (!(profile.grantedDomAppearance(entry.url().getHost()))) {
-            reason = ErrorURL.DENIED_NO_MATCH_WITH_DOMAIN_FILTER;
+            reason = "url does not match domain filter";
            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' is not listed in granted domains. " + 
                             "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
            return reason;
@ -453,7 +452,7 @@ public final class CrawlStacker extends Thread {

        // deny urls that exceed allowed number of occurrences
        if (!(profile.grantedDomCount(entry.url().getHost()))) {
-            reason = ErrorURL.DENIED_DOMAIN_COUNT_EXCEEDED;
+            reason = "domain counter exceeded";
            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' appeared too often, a maximum of " + profile.domMaxPages() + " is allowed. "+ 
                             "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
            return reason;
@ -465,12 +464,12 @@ public final class CrawlStacker extends Thread {
        final boolean recrawl = (oldEntry != null) && (profile.recrawlIfOlder() > oldEntry.loaddate().getTime());
        // do double-check
        if ((dbocc != null) && (!recrawl)) {
-            reason = ErrorURL.DOUBLE_REGISTERED + dbocc + ")";
+            reason = "double " + dbocc + ")";
            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
            return reason;
        }
        if ((oldEntry != null) && (!recrawl)) {
-            reason = ErrorURL.DOUBLE_REGISTERED + "LURL)";
+            reason = "double " + "LURL)";
            if (this.log.isFine()) this.log.logFine("URL '" + entry.url().toString() + "' is double registered in 'LURL'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");
            return reason;
        }
--- a/source/de/anomic/crawler/ErrorURL.java
+++ b/source/de/anomic/crawler/ErrorURL.java
@ -1,94 +0,0 @@
-// plasmaCrawlEURL.java
-// (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 09.08.2004 on http://www.anomic.de
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
-//
-// LICENSE
-// 
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package de.anomic.crawler;
-
-public class ErrorURL {
-
-    /* =======================================================================
-     * Failure reason constants
-     * ======================================================================= */
-
-    // invalid urls
-    public static final String DENIED_URL_NULL = "denied_(url_null)";
-    public static final String DENIED_MALFORMED_URL = "denied_(malformed_url)";
-    public static final String DENIED_UNSUPPORTED_PROTOCOL = "denied_(unsupported_protocol)";
-    public static final String DENIED_LOOPBACK_IP_ADDRESS = "denied_(loopback_ip_address)";
-    public static final String DENIED_CACHEFILE_PATH_TOO_LONG = "denied_(cachefile_path_too_long)";
-    public static final String DENIED_INVALID_CACHEFILE_PATH = "denied_(invalid_cachefile_path)";    
-    
-    // blacklisted/blocked urls
-    public static final String DENIED_URL_IN_BLACKLIST = "denied_(url_in_blacklist)";
-    public static final String DENIED_URL_DOES_NOT_MATCH_FILTER = "denied_(does_not_match_filter)";
-    public static final String DENIED_CGI_URL = "denied_(cgi_url)";
-    public static final String DENIED_POST_URL = "denied_(post_url)";
-    public static final String DENIED_NO_MATCH_WITH_DOMAIN_FILTER = "denied_(no_match_with_domain_filter)";
-    public static final String DENIED_DOMAIN_COUNT_EXCEEDED = "denied_(domain_count_exceeded)";    
-    public static final String DENIED_ROBOTS_TXT = "denied_(robots.txt)";
-    
-    // wrong content
-    public static final String DENIED_WRONG_MIMETYPE_OR_EXT = "denied_(wrong_mimetype_or_extension)";
-    public static final String DENIED_UNSUPPORTED_CHARSET = "denied_(unsupported_charset)";
-    public static final String DENIED_REDIRECTION_HEADER_EMPTY = "denied_(redirection_header_empty)";
-    public static final String DENIED_REDIRECTION_COUNTER_EXCEEDED = "denied_(redirection_counter_exceeded)";
-    public static final String DENIED_REDIRECTION_TO_DOUBLE_CONTENT = "denied_(redirection_to_double_content)";
-    public static final String DENIED_WRONG_HTTP_STATUSCODE = "denied_(wrong_http_status_code_";
-    public static final String DENIED_CONTENT_DECODING_ERROR = "denied_(content_decoding_error)";
-    public static final String DENIED_FILESIZE_LIMIT_EXCEEDED = "denied_(filesize_limit_exceeded)";
-    public static final String DENIED_FILESIZE_UNKNOWN = "denied_(filesize_unknown)";
-    
-    // network errors
-    public static final String DENIED_UNKNOWN_HOST = "denied_(unknown_host)";
-    public static final String DENIED_NO_ROUTE_TO_HOST = "denied_(no_route_to_host)"; 
-    public static final String DENIED_NETWORK_IS_UNREACHABLE = "denied_(Network_is_unreachable)"; 
-    
-    // connection errors
-    public static final String DENIED_CONNECTION_ERROR = "denied_(connection_error)";
-    public static final String DENIED_CONNECTION_BIND_EXCEPTION = "denied_(connection_bind_exception)";
-    public static final String DENIED_CONNECTION_TIMEOUT = "denied_(connection_timeout)";
-    public static final String DENIED_CONNECTION_REFUSED = "denied_(connection_refused)";    
-    public static final String DENIED_SSL_UNTRUSTED_CERT = "denied_(No_trusted_ssl_certificate_found)";
-
-    // double registered errors
-    public static final String DOUBLE_REGISTERED = "double_(registered_in_";
-    
-    // server errors
-    public static final String DENIED_OUT_OF_DISK_SPACE = "denied_(out_of_disk_space)";
-    public static final String DENIED_SERVER_SHUTDOWN = "denied_(server_shutdown)";
-    public static final String DENIED_SERVER_LOGIN_FAILED = "denied_(server_login_failed)";
-    public static final String DENIED_SERVER_TRASFER_MODE_PROBLEM = "denied_(server_transfermode_problem)";
-    public static final String DENIED_SERVER_DOWNLOAD_ERROR = "denied_(server_download_error)";
-    
-    // Parser errors
-    public static final String DENIED_PARSER_ERROR = "denied_(parser_error)";
-    public static final String DENIED_DOCUMENT_ENCRYPTED = "denied_(document_encrypted)";
-    public static final String DENIED_NOT_PARSEABLE_NO_CONTENT = "denied_(not_parseabel_no_content)";
-    
-    // indexing errors
-    public static final String DENIED_UNSPECIFIED_INDEXING_ERROR = "denied_(unspecified_indexing_error)";
-    public static final String DENIED_UNKNOWN_INDEXING_PROCESS_CASE = "denied_(unknown_indexing_process_case)";
-
-}
--- a/source/de/anomic/crawler/FTPLoader.java
+++ b/source/de/anomic/crawler/FTPLoader.java
@ -104,7 +104,7 @@ public class FTPLoader {

        if (openConnection(ftpClient, entryUrl)) {
            // ftp stuff
-            try {
+            //try {
                // testing if the specified file is a directory
                if (file.length() > 0) {
                    ftpClient.exec("cd \"" + path + "\"", false);
@ -133,9 +133,12 @@ public class FTPLoader {
                        (new PrintStream(berr)).print(e.getMessage());
                    }
                }
+                /*
            } finally {
                closeConnection(ftpClient);
            }
+            */
+            closeConnection(ftpClient);
        }

        // pass the downloaded resource to the cache manager
@ -143,7 +146,7 @@ public class FTPLoader {
            // some error logging
            final String detail = (berr.size() > 0) ? "\n    Errorlog: " + berr.toString() : "";
            log.logWarning("Unable to download URL " + entry.url().toString() + detail);
-            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_SERVER_DOWNLOAD_ERROR);
+            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "server download" + detail);
        }

        return htCache;
@ -239,14 +242,13 @@ public class FTPLoader {
                htCache.setCacheArray(b);
            } else {
                log.logInfo("REJECTED TOO BIG FILE with size " + size + " Bytes for URL " + entry.url().toString());
-                sb.crawlQueues.errorURL.newEntry(entry, this.sb.webIndex.seedDB.mySeed().hash, new Date(), 1,
-                        ErrorURL.DENIED_FILESIZE_LIMIT_EXCEEDED);
+                sb.crawlQueues.errorURL.newEntry(entry, this.sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "file size limit exceeded");
                throw new Exception("file size exceeds limit");
            }
        } else {
            // if the response has not the right file type then reject file
            log.logInfo("REJECTED WRONG MIME/EXT TYPE " + mimeType + " for URL " + entry.url().toString());
-            sb.crawlQueues.errorURL.newEntry(entry, this.sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_WRONG_MIMETYPE_OR_EXT);
+            sb.crawlQueues.errorURL.newEntry(entry, this.sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "wrong mime type or wrong extension");
            throw new Exception("response has not the right file type -> rejected");
        }
        return htCache;
--- a/source/de/anomic/crawler/HTTPLoader.java
+++ b/source/de/anomic/crawler/HTTPLoader.java
@ -26,10 +26,6 @@
 package de.anomic.crawler;

 import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.NoRouteToHostException;
-import java.net.SocketException;
-import java.net.UnknownHostException;
 import java.util.Date;

 import de.anomic.http.HttpClient;
@ -37,14 +33,12 @@ import de.anomic.http.JakartaCommonsHttpClient;
 import de.anomic.http.JakartaCommonsHttpResponse;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.http.httpResponseHeader;
-import de.anomic.http.httpdLimitExceededException;
 import de.anomic.http.httpdProxyCacheEntry;
 import de.anomic.index.indexDocumentMetadata;
 import de.anomic.index.indexReferenceBlacklist;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.plasmaSwitchboardConstants;
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacyURL;

@ -105,16 +99,15 @@ public final class HTTPLoader {
        return metadata;
    }    
   
-    public indexDocumentMetadata load(final CrawlEntry entry, final String parserMode) {
+    public indexDocumentMetadata load(final CrawlEntry entry, final String parserMode) throws IOException {
        return load(entry, parserMode, DEFAULT_CRAWLING_RETRY_COUNT);
    }
    
-    private indexDocumentMetadata load(final CrawlEntry entry, final String parserMode, final int retryCount) {
+    private indexDocumentMetadata load(final CrawlEntry entry, final String parserMode, final int retryCount) throws IOException {

        if (retryCount < 0) {
-            this.log.logInfo("Redirection counter exceeded for URL " + entry.url().toString() + ". Processing aborted.");
-            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_REDIRECTION_COUNTER_EXCEEDED).store();
-            return null;
+            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "redirection counter exceeded").store();
+            throw new IOException("Redirection counter exceeded for URL " + entry.url().toString() + ". Processing aborted.");
        }
        
        final Date requestDate = new Date(); // remember the time...
@ -127,15 +120,14 @@ public final class HTTPLoader {
        // check if url is in blacklist
        final String hostlow = host.toLowerCase();
        if (plasmaSwitchboard.urlBlacklist.isListed(indexReferenceBlacklist.BLACKLIST_CRAWLER, hostlow, path)) {
-            this.log.logInfo("CRAWLER Rejecting URL '" + entry.url().toString() + "'. URL is in blacklist.");
-            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_URL_IN_BLACKLIST).store();
-            return null;
+            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "url in blacklist").store();
+            throw new IOException("CRAWLER Rejecting URL '" + entry.url().toString() + "'. URL is in blacklist.");
        }
        
        // take a file from the net
        indexDocumentMetadata htCache = null;
        final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE);
-        try {
+        //try {
            // create a request header
            final httpRequestHeader requestHeader = new httpRequestHeader();
            requestHeader.put(httpRequestHeader.USER_AGENT, crawlerUserAgent);
@ -150,7 +142,7 @@ public final class HTTPLoader {
            final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(socketTimeout, requestHeader);
            
            JakartaCommonsHttpResponse res = null;
-            try {
+            //try {
                // send request
                res = client.GET(entry.url().toString());

@ -161,15 +153,14 @@ public final class HTTPLoader {
                htCache = createCacheEntry(entry, requestDate, requestHeader, res.getResponseHeader(), res.getStatusLine()); 
                
                // request has been placed and result has been returned. work off response
-                try {
+                //try {
                    if (plasmaParser.supportedContent(parserMode, entry.url(), res.getResponseHeader().mime())) {
                        
                        // get the content length and check if the length is allowed
                        long contentLength = res.getResponseHeader().getContentLength();
                        if (maxFileSize >= 0 && contentLength > maxFileSize) {
-                            this.log.logInfo("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes.");
-                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_FILESIZE_LIMIT_EXCEEDED);                    
-                            return null;
+                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "file size limit exceeded");                    
+                            throw new IOException("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes.");
                        }

                        // we write the new cache entry to file system directly
@ -179,19 +170,19 @@ public final class HTTPLoader {
                        
                        // check length again in case it was not possible to get the length before loading
                        if (maxFileSize >= 0 && contentLength > maxFileSize) {
-                            this.log.logInfo("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes.");
-                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_FILESIZE_LIMIT_EXCEEDED);                    
-                            return null;
+                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "file size limit exceeded");                    
+                            throw new IOException("REJECTED URL " + entry.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes.");
                        }
                        
                        htCache.setCacheArray(responseBody);
                    } else {
                        // if the response has not the right file type then reject file
                        this.log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString());
-                        sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_WRONG_MIMETYPE_OR_EXT);
+                        sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "wrong mime type or wrong extension");
                        htCache = null;
                    }
                    return htCache;
+                    /*
                } catch (final SocketException e) {
                    // this may happen if the client suddenly closes its connection
                    // maybe the user has stopped loading
@ -201,7 +192,7 @@ public final class HTTPLoader {
                    this.log.logSevere("CRAWLER LOADER ERROR1: with URL=" + entry.url().toString() + ": " + e.toString());
                    sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_CONNECTION_ERROR);
                    htCache = null;
-                }
+                }*/
            } else if (res.getStatusLine().startsWith("30")) {
                    if (res.getResponseHeader().containsKey(httpRequestHeader.LOCATION)) {
                        // getting redirection URL
@ -209,9 +200,8 @@ public final class HTTPLoader {
                        redirectionUrlString = redirectionUrlString.trim();

                        if (redirectionUrlString.length() == 0) {
-                            this.log.logWarning("CRAWLER Redirection of URL=" + entry.url().toString() + " aborted. Location header is empty.");
-                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_REDIRECTION_HEADER_EMPTY);
-                            return null;
+                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "redirection header empy");
+                            throw new IOException("CRAWLER Redirection of URL=" + entry.url().toString() + " aborted. Location header is empty.");
                        }
                        
                        // normalizing URL
@ -223,9 +213,8 @@ public final class HTTPLoader {

                        // if we are already doing a shutdown we don't need to retry crawling
                        if (Thread.currentThread().isInterrupted()) {
-                            this.log.logSevere("CRAWLER Retry of URL=" + entry.url().toString() + " aborted because of server shutdown.");
-                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_SERVER_SHUTDOWN);
-                            return null;
+                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "server shutdown");
+                            throw new IOException("CRAWLER Retry of URL=" + entry.url().toString() + " aborted because of server shutdown.");
                        }

                        // generating url hash
@ -234,9 +223,8 @@ public final class HTTPLoader {
                        // check if the url was already indexed
                        final String dbname = sb.urlExists(urlhash);
                        if (dbname != null) {
-                            this.log.logWarning("CRAWLER Redirection of URL=" + entry.url().toString() + " ignored. The url appears already in db " + dbname);
-                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_REDIRECTION_TO_DOUBLE_CONTENT);
-                            return null;
+                            sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "redirection to double content");
+                            throw new IOException("CRAWLER Redirection of URL=" + entry.url().toString() + " ignored. The url appears already in db " + dbname);
                        }
                        
                        // retry crawling with new url
@ -248,16 +236,17 @@ public final class HTTPLoader {
                this.log.logInfo("REJECTED WRONG STATUS TYPE '" + res.getStatusLine() + "' for URL " + entry.url().toString());
                
                // not processed any further
-                sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, ErrorURL.DENIED_WRONG_HTTP_STATUSCODE + res.getStatusCode() +  ")");
+                sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, "wrong http status code " + res.getStatusCode() +  ")");
            }
-            
+            /*
            } finally {
                if(res != null) {
                    // release connection
                    res.closeStream();
                }
-            }
+            }*/
            return htCache;
+        /*
        } catch (final Exception e) {
            final String errorMsg = e.getMessage();
            String failreason = null;
@ -340,7 +329,7 @@ public final class HTTPLoader {
                sb.crawlQueues.errorURL.newEntry(entry, sb.webIndex.seedDB.mySeed().hash, new Date(), 1, failreason);
            }
            return null;
-        }
+        }*/
    }
    
 }
--- a/source/de/anomic/crawler/ProtocolLoader.java
+++ b/source/de/anomic/crawler/ProtocolLoader.java
@ -26,6 +26,7 @@

 package de.anomic.crawler;

+import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Iterator;
@ -68,14 +69,14 @@ public final class ProtocolLoader {
        return (HashSet<String>) this.supportedProtocols.clone();
    }
    
-    public indexDocumentMetadata load(final CrawlEntry entry, final String parserMode) {
-        // getting the protocol of the next URL                
+    public indexDocumentMetadata load(final CrawlEntry entry, final String parserMode) throws IOException {
+        // getting the protocol of the next URL
        final String protocol = entry.url().getProtocol();
        final String host = entry.url().getHost();
        
        // check if this loads a page from localhost, which must be prevented to protect the server
        // against attacks to the administration interface when localhost access is granted
-        if (serverCore.isLocalhost(host) && sb.getConfigBool("adminAccountForLocalhost", false)) return null;
+        if (serverCore.isLocalhost(host) && sb.getConfigBool("adminAccountForLocalhost", false)) throw new IOException("access to localhost not granted for url " + entry.url());
        
        // check access time
        if (!entry.url().isLocal()) {
@ -102,8 +103,7 @@ public final class ProtocolLoader {
        if ((protocol.equals("http") || (protocol.equals("https")))) return httpLoader.load(entry, parserMode);
        if (protocol.equals("ftp")) return ftpLoader.load(entry);
        
-        this.log.logWarning("Unsupported protocol '" + protocol + "' in url " + entry.url());
-        return null;
+        throw new IOException("Unsupported protocol '" + protocol + "' in url " + entry.url());
    }
    
    public String process(final CrawlEntry entry, final String parserMode) {
@ -112,13 +112,14 @@ public final class ProtocolLoader {
        indexDocumentMetadata h;
        try {
            h = load(entry, parserMode);
+            assert h != null;
            entry.setStatus("loaded");
-            if (h == null) return "load failed";
            final boolean stored = sb.htEntryStoreProcess(h);
            entry.setStatus("stored-" + ((stored) ? "ok" : "fail"));
            return (stored) ? null : "not stored";
-        } catch (final Exception e) {
-            log.logWarning("problem loading " + entry.url().toString(), e);
+        } catch (IOException e) {
+            entry.setStatus("error");
+            log.logWarning("problem loading " + entry.url().toString());
            return "load error - " + e.getMessage();
        }
    }
--- a/source/de/anomic/plasma/parser/ParserException.java
+++ b/source/de/anomic/plasma/parser/ParserException.java
@ -24,7 +24,6 @@

 package de.anomic.plasma.parser;

-import de.anomic.crawler.ErrorURL;
 import de.anomic.yacy.yacyURL;

 public class ParserException extends Exception
@ -39,7 +38,7 @@ public class ParserException extends Exception
    }

    public ParserException(final String message, final yacyURL url) {
-        this(message,url,ErrorURL.DENIED_PARSER_ERROR);
+        this(message,url, "parser error for url " + url.toString());
    }    
    
    public ParserException(final String message, final yacyURL url, final String errorCode) {
@ -49,7 +48,7 @@ public class ParserException extends Exception
    }

    public ParserException(final String message, final yacyURL url, final Throwable cause) {
-        this(message,url,cause,ErrorURL.DENIED_PARSER_ERROR);
+        this(message,url,cause, "parser error for url " + url.toString());
    }
    
    public ParserException(final String message, final yacyURL url, final Throwable cause, final String errorCode) {
--- a/source/de/anomic/plasma/parser/pdf/pdfParser.java
+++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java
@ -39,7 +39,6 @@ import org.pdfbox.pdmodel.encryption.AccessPermission;
 import org.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
 import org.pdfbox.util.PDFTextStripper;

-import de.anomic.crawler.ErrorURL;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.parser.AbstractParser;
 import de.anomic.plasma.parser.Parser;
@ -107,7 +106,7 @@ public class pdfParser extends AbstractParser implements Parser {
                theDocument.openProtection(new StandardDecryptionMaterial(""));
                final AccessPermission perm = theDocument.getCurrentAccessPermission();
                if (perm == null || !perm.canExtractContent())
-                    throw new ParserException("Document is encrypted",location,ErrorURL.DENIED_DOCUMENT_ENCRYPTED);
+                    throw new ParserException("Document is encrypted",location, "document is exncrypted");
            }
            
            // extracting some metadata
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@ -52,7 +52,6 @@ import java.util.Map.Entry;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

-import de.anomic.crawler.ErrorURL;
 import de.anomic.htmlFilter.htmlFilterContentScraper;
 import de.anomic.htmlFilter.htmlFilterImageEntry;
 import de.anomic.htmlFilter.htmlFilterInputStream;
@ -546,7 +545,7 @@ public final class plasmaParser {
            if (sourceArray == null || sourceArray.length == 0) {
                final String errorMsg = "No resource content available (1).";
                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-                throw new ParserException(errorMsg,location,ErrorURL.DENIED_NOT_PARSEABLE_NO_CONTENT);
+                throw new ParserException(errorMsg,location, "document has no content");
            }              
            
            // creating an InputStream
@ -580,7 +579,7 @@ public final class plasmaParser {
            if (!(sourceFile.exists() && sourceFile.canRead() && sourceFile.length() > 0)) {
                final String errorMsg = sourceFile.exists() ? "Empty resource file." : "No resource content available (2).";
                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-                throw new ParserException(errorMsg,location,ErrorURL.DENIED_NOT_PARSEABLE_NO_CONTENT);
+                throw new ParserException(errorMsg,location, "document has no content");
            }        
            
            // create a new InputStream
@ -634,7 +633,7 @@ public final class plasmaParser {
            if (!plasmaParser.supportedContent(location,mimeType)) {
                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "'";
                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-                throw new ParserException(errorMsg,location,ErrorURL.DENIED_WRONG_MIMETYPE_OR_EXT);
+                throw new ParserException(errorMsg,location, "wrong mime type or wrong extension");
            }
            
            if (this.theLogger.isFine())
@ -656,7 +655,7 @@ public final class plasmaParser {
            } else {
                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "'";
                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-                throw new ParserException(errorMsg,location,ErrorURL.DENIED_WRONG_MIMETYPE_OR_EXT);                
+                throw new ParserException(errorMsg,location, "wrong mime type or wrong extension");                
            }
            
            // check result
@ -668,9 +667,9 @@ public final class plasmaParser {
            return doc;
            
        } catch (final UnsupportedEncodingException e) {
-            final String errorMsg = "Unsupported charset encoding: " + e.getMessage();
+            final String errorMsg = "unsupported charset encoding: " + e.getMessage();
            this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
-            throw new ParserException(errorMsg,location,ErrorURL.DENIED_UNSUPPORTED_CHARSET);                	
+            throw new ParserException(errorMsg,location, errorMsg);                	
        } catch (final Exception e) {
            // Interrupted- and Parser-Exceptions should pass through
            if (e instanceof InterruptedException) throw (InterruptedException) e;
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@ -26,7 +26,6 @@

 package de.anomic.plasma;

-import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
--- a/source/de/anomic/plasma/plasmaSearchImages.java
+++ b/source/de/anomic/plasma/plasmaSearchImages.java
@ -22,6 +22,7 @@

 package de.anomic.plasma;

+import java.io.IOException;
 import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.util.HashMap;
@ -31,6 +32,7 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
 import de.anomic.htmlFilter.htmlFilterImageEntry;
 import de.anomic.plasma.parser.ParserException;
 import de.anomic.server.serverDate;
+import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacyURL;

 public final class plasmaSearchImages {
@ -41,7 +43,13 @@ public final class plasmaSearchImages {
        final long start = System.currentTimeMillis();
        this.images = new HashMap<String, htmlFilterImageEntry>();
        if (maxTime > 10) {
-            final Object[] resource = plasmaSnippetCache.getResource(url, true, (int) maxTime, false, indexing);
+            Object[] resource = null;
+            try {
+                resource = plasmaSnippetCache.getResource(url, true, (int) maxTime, false, indexing);
+            } catch (IOException e) {
+                serverLog.logWarning("ViewImage", "cannot load: " + e.getMessage());
+            }
+            if (resource == null) return;
            final InputStream res = (InputStream) resource[0];
            final Long resLength = (Long) resource[1];
            if (res != null) {
@ -51,6 +59,7 @@ public final class plasmaSearchImages {
                    document = plasmaSnippetCache.parseDocument(url, resLength.longValue(), res);
                } catch (final ParserException e) {
                    // parsing failed
+                    serverLog.logWarning("ViewImage", "cannot parse: " + e.getMessage());
                } finally {
                    try { res.close(); } catch (final Exception e) {/* ignore this */}
                }
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@ -25,6 +25,7 @@
 package de.anomic.plasma;

 import java.io.ByteArrayInputStream;
+import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Enumeration;
@ -906,8 +907,9 @@ public class plasmaSnippetCache {
     * <tr><td>[0]</td><td>the content as {@link InputStream}</td></tr>
     * <tr><td>[1]</td><td>the content-length as {@link Integer}</td></tr>
     * </table>
+     * @throws IOException 
     */
-    public static Object[] getResource(final yacyURL url, final boolean fetchOnline, final int socketTimeout, final boolean forText, final boolean reindexing) {
+    public static Object[] getResource(final yacyURL url, final boolean fetchOnline, final int socketTimeout, final boolean forText, final boolean reindexing) throws IOException {
        // load the url as resource from the web
            long contentLength = -1;
            
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -112,7 +112,6 @@ import de.anomic.crawler.CrawlEntry;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.CrawlQueues;
 import de.anomic.crawler.CrawlStacker;
-import de.anomic.crawler.ErrorURL;
 import de.anomic.crawler.HTTPLoader;
 import de.anomic.crawler.ImporterManager;
 import de.anomic.crawler.IndexingStack;
@ -1229,7 +1228,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
            }
            
            // check if the document should be indexed
-            String noIndexReason = ErrorURL.DENIED_UNSPECIFIED_INDEXING_ERROR;
+            String noIndexReason = "unspecified indexing error";
            if (queueEntry.processCase() == plasmaSwitchboardConstants.PROCESSCASE_4_PROXY_LOAD) {
                // proxy-load
                noIndexReason = queueEntry.shallIndexCacheForProxy();
@ -1685,7 +1684,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        // STORE WORD INDEX
        if ((!queueEntry.profile().indexText()) && (!queueEntry.profile().indexMedia())) {
            if (this.log.isFine()) log.logFine("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': process case=" + processCase);
-            addURLtoErrorDB(queueEntry.url(), referrerURL.hash(), queueEntry.initiator(), dc_title, ErrorURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE);
+            addURLtoErrorDB(queueEntry.url(), referrerURL.hash(), queueEntry.initiator(), dc_title, "unknown indexing process case"  + processCase);
            return;
        }
        
@ -1768,7 +1767,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        InputStream resourceContent = null;
        try {
            // get the resource content
-            final Object[] resource = plasmaSnippetCache.getResource(comp.url(), fetchOnline, 10000, true, false);
+            Object[] resource = null;
+            try {
+                resource = plasmaSnippetCache.getResource(comp.url(), fetchOnline, 10000, true, false);
+            } catch (IOException e) {
+                serverLog.logWarning("removeAllUrlReferences", "cannot load: " + e.getMessage());
+            }
            if (resource == null) {
                // delete just the url entry
                webIndex.removeURL(urlhash);
--- a/source/de/anomic/ymage/ymageOSM.java
+++ b/source/de/anomic/ymage/ymageOSM.java
@ -38,6 +38,7 @@ import javax.imageio.ImageIO;
 import de.anomic.index.indexDocumentMetadata;
 import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacyURL;

 public class ymageOSM {
@ -79,7 +80,13 @@ public class ymageOSM {
        InputStream tileStream = plasmaHTCache.getResourceContentStream(tileURL);
        if (tileStream == null) {
            // download resource using the crawler and keep resource in memory if possible
-            final indexDocumentMetadata entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(tileURL, 20000, true, false, false);
+            indexDocumentMetadata entry = null;
+            try {
+                entry = plasmaSwitchboard.getSwitchboard().crawlQueues.loadResourceFromWeb(tileURL, 20000, true, false, false);
+            } catch (IOException e) {
+                serverLog.logWarning("yamyOSM", "cannot load: " + e.getMessage());
+                return null;
+            }
            if ((entry == null) || (entry.cacheArray() == null)) return null;
            tileStream = new ByteArrayInputStream(entry.cacheArray());
        }