*) Parser now throws an ParserException instead of returning null on parsing errors (e.g. needed by snippet fetcher)

*) better logging of parser failures *) simplified usage of plasmaparser through switchboard *) restructuring of crawler - crawler now returns an error message if it is used in sync mode (e.g. by snippet fetcher) *) snippet-fetcher: more verbose error messages *) serverByteBuffer.java: adding new function append(String,encoding) *) serverFileUtils.java: adding functions to copy only a given number of bytes between streams git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2641 6c8d7289-2bf4-0310-a012-ef5d649a1542
19 years ago · b6c7b91582
parent aa38721cf6
commit b6c7b91582
28 changed files with 637 additions and 364 deletions
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@ -51,11 +51,11 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;

-import de.anomic.net.URL;
 import de.anomic.data.bookmarksDB;
 import de.anomic.data.listManager;
 import de.anomic.data.bookmarksDB.Tag;
 import de.anomic.http.httpHeader;
+import de.anomic.net.URL;
 import de.anomic.plasma.plasmaCrawlLURL;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
--- a/htroot/ViewFile.html
+++ b/htroot/ViewFile.html
@ -53,9 +53,11 @@ Unable to find URL Entry in DB
 :: <!-- 3 -->
 Invalid URL
 :: <!-- 4 -->
-Unable to download resource content.
+Unable to download resource content.<br> 
+<tt>#[errorText]#</tt>
 :: <!-- 5 -->
-Unable to parse resource content.
+Unable to parse resource content.<br>
+<tt>#[errorText]#</tt>
 :: <!-- 6 -->
 Unsupported protocol.
 #(/error)#
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -57,6 +57,8 @@ import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.crawler.plasmaCrawlerException;
+import de.anomic.plasma.parser.ParserException;
 import de.anomic.plasma.plasmaCrawlLURL.Entry;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -83,174 +85,185 @@ public class ViewFile {
        serverObjects prop = new serverObjects();
        plasmaSwitchboard sb = (plasmaSwitchboard)env;     

-
-
-        if (post.containsKey("words"))
+        if (post != null && post.containsKey("words"))
            try {
                prop.put("error_words",URLEncoder.encode((String) post.get("words"), "UTF-8"));
            } catch (UnsupportedEncodingException e1) {
-                // TODO Auto-generated catch block
-                e1.printStackTrace();
+                // ignore this. this should not occure
            }

-            if (post != null) {
-                // getting the url hash from which the content should be loaded
-                String urlHash = post.get("urlHash","");       
-                if (urlHash.equals("")) {
-                    prop.put("error",1);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;
-                }

-                String viewMode = post.get("viewMode","sentences");
-
-                // getting the urlEntry that belongs to the url hash
-                Entry urlEntry = null;
-                urlEntry = sb.urlPool.loadedURL.load(urlHash, null);
-                if (urlEntry == null) {
-                    prop.put("error",2);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;
-                }            
+            // getting the url hash from which the content should be loaded
+            String urlHash = post.get("urlHash","");       
+            if (urlHash.equals("")) {
+                prop.put("error",1);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop;
+            }

-                // gettin the url that belongs to the entry
-                URL url = urlEntry.url();
-                if (url == null) {
-                    prop.put("error",3);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;
-                }    
+            String viewMode = post.get("viewMode","sentences");
+
+            // getting the urlEntry that belongs to the url hash
+            Entry urlEntry = null;
+            urlEntry = sb.urlPool.loadedURL.load(urlHash, null);
+            if (urlEntry == null) {
+                prop.put("error",2);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop;
+            }            
+
+            // gettin the url that belongs to the entry
+            URL url = urlEntry.url();
+            if (url == null) {
+                prop.put("error",3);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop;
+            }    
+
+            // loading the resource content as byte array
+            byte[] resource = null;
+            IResourceInfo resInfo = null;
+            String resMime = null;
+            try {
+                // trying to load the resource body
+                resource = sb.cacheManager.loadResourceContent(url);
+
+                // if the resource body was not cached we try to load it from web
+                if (resource == null) {
+                    plasmaHTCache.Entry entry = null;
+                    try {
+                        entry = sb.snippetCache.loadResourceFromWeb(url, 5000);
+                    } catch (plasmaCrawlerException e) {
+                        prop.put("error",4);
+                        prop.put("error_errorText",e.getMessage());
+                        prop.put("viewMode",VIEW_MODE_NO_TEXT);                        
+                        return prop;
+                    }

-                // loading the resource content as byte array
-                byte[] resource = null;
-                IResourceInfo resInfo = null;
-                String resMime = null;
-                try {
-                    // trying to load the resource body
-                    resource = sb.cacheManager.loadResourceContent(url);
+                    if (entry != null) {
+                        resInfo = entry.getDocumentInfo();
+                        resource = sb.cacheManager.loadResourceContent(url);
+                    }

-                    // if the resource body was not cached we try to load it from web
                    if (resource == null) {
-                        plasmaHTCache.Entry entry = sb.snippetCache.loadResourceFromWeb(url, 5000);                 
+                        prop.put("error",4);
+                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                        return prop;
+                    } 
+                }

-                        if (entry != null) {
-                            resInfo = entry.getDocumentInfo();
-                            resource = sb.cacheManager.loadResourceContent(url);
+                // try to load resource metadata
+                if (resInfo == null) {
+
+                    // try to load the metadata from cache
+                    try {
+                        resInfo = sb.cacheManager.loadResourceInfo(urlEntry.url());
+                    } catch (Exception e) { /* ignore this */}
+
+                    // if the metadata where not cached try to load it from web
+                    if (resInfo == null) {
+                        String protocol = url.getProtocol();
+                        if (!((protocol.equals("http") || protocol.equals("https")))) {
+                            prop.put("error",6);
+                            prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                            return prop;                                
                        }

-                        if (resource == null) {
+                        httpHeader responseHeader = httpc.whead(url,url.getHost(),5000,null,null,sb.remoteProxyConfig);
+                        if (responseHeader == null) {
                            prop.put("error",4);
                            prop.put("viewMode",VIEW_MODE_NO_TEXT);
                            return prop;
                        } 
+                        resMime = responseHeader.mime();
                    }
+                } else {
+                    resMime = resInfo.getMimeType();
+                }
+            } catch (IOException e) {
+                prop.put("error",4);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop; 
+            }    
+            if (viewMode.equals("plain")) {                
+                String content = new String(resource);
+                content = content.replaceAll("<","&lt;")
+                .replaceAll(">","&gt;")
+                .replaceAll("\"","&quot;")
+                .replaceAll("\n","<br>")
+                .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");

-                    // try to load resource metadata
-                    if (resInfo == null) {
-
-                        // try to load the metadata from cache
-                        try {
-                            resInfo = sb.cacheManager.loadResourceInfo(urlEntry.url());
-                        } catch (Exception e) { /* ignore this */}
-
-                        // if the metadata where not cached try to load it from web
-                        if (resInfo == null) {
-                            String protocol = url.getProtocol();
-                            if (!((protocol.equals("http") || protocol.equals("https")))) {
-                                prop.put("error",6);
-                                prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                                return prop;                                
-                            }
-
-                            httpHeader responseHeader = httpc.whead(url,url.getHost(),5000,null,null,sb.remoteProxyConfig);
-                            if (responseHeader == null) {
-                                prop.put("error",4);
-                                prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                                return prop;
-                            } 
-                            resMime = responseHeader.mime();
-                        }
-                    } else {
-                        resMime = resInfo.getMimeType();
-                    }
-                } catch (IOException e) {
-                    if (url == null) {
-                        prop.put("error",4);
-                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                        return prop;
-                    }   
-                }    
-                if (viewMode.equals("plain")) {                
-                    String content = new String(resource);
-                    content = content.replaceAll("<","&lt;")
-                    .replaceAll(">","&gt;")
-                    .replaceAll("\"","&quot;")
-                    .replaceAll("\n","<br>")
-                    .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
-
-                    prop.put("error",0);
-                    prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
-                    prop.put("viewMode_plainText",content);                     
-                } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
-                    // parsing the resource content
-                    plasmaParserDocument document = sb.snippetCache.parseDocument(url, resource,resInfo);
+                prop.put("error",0);
+                prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
+                prop.put("viewMode_plainText",content);                     
+            } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
+                // parsing the resource content
+                plasmaParserDocument document = null;
+                try {
+                    document = sb.snippetCache.parseDocument(url, resource,resInfo);
                    if (document == null) {
                        prop.put("error",5);
+                        prop.put("error_errorText","Unknown error");
                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
                        return prop;                
                    }
-                    resMime = document.getMimeType();
-
-                    if (viewMode.equals("parsed")) {
-                        String content = new String(document.getText());
-                        content = wikiCode.replaceHTML(content); //added by Marc Nause
-                        content = content.replaceAll("\n","<br>")
-                        .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
-
-                        prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
-                        prop.put("viewMode_parsedText",content);
-                    } else if (viewMode.equals("iframe")) {
-                        prop.put("viewMode",VIEW_MODE_AS_IFRAME);
-                        prop.put("viewMode_url",url.toString());
-                    } else {
-                        prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
-                        String[] sentences = document.getSentences();
-
-                        boolean dark = true;
-                        for (int i=0; i < sentences.length; i++) {
-                            String currentSentence = wikiCode.replaceHTML(sentences[i]);
-
-                            // Search word highlighting
-                            String words = post.get("words",null);
-                            if (words != null) {
-                                try {
-                                    words = URLDecoder.decode(words,"UTF-8");
-                                } catch (UnsupportedEncodingException e) {}
-
-                                String[] wordArray = words.substring(1,words.length()-1).split(",");
-                                for (int j=0; j < wordArray.length; j++) {
-                                    String currentWord = wordArray[j].trim(); 
-                                    currentSentence = currentSentence.replaceAll(currentWord,
-                                            "<b style=\"color: black; background-color: rgb(" + highlightingColors[j%6] + ");\">" + currentWord + "</b>");
-                                }
-                            }
+                } catch (ParserException e) {
+                    prop.put("error",5);
+                    prop.put("error_errorText",e.getMessage());
+                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                    return prop;     
+                }
+                resMime = document.getMimeType();

-                            prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1)); 
-                            prop.put("viewMode_sentences_" + i + "_text",currentSentence);   
-                            prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
+                if (viewMode.equals("parsed")) {
+                    String content = new String(document.getText());
+                    content = wikiCode.replaceHTML(content); //added by Marc Nause
+                    content = content.replaceAll("\n","<br>")
+                    .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
+
+                    prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
+                    prop.put("viewMode_parsedText",content);
+                } else if (viewMode.equals("iframe")) {
+                    prop.put("viewMode",VIEW_MODE_AS_IFRAME);
+                    prop.put("viewMode_url",url.toString());
+                } else {
+                    prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
+                    String[] sentences = document.getSentences();
+
+                    boolean dark = true;
+                    for (int i=0; i < sentences.length; i++) {
+                        String currentSentence = wikiCode.replaceHTML(sentences[i]);
+
+                        // Search word highlighting
+                        String words = post.get("words",null);
+                        if (words != null) {
+                            try {
+                                words = URLDecoder.decode(words,"UTF-8");
+                            } catch (UnsupportedEncodingException e) {}
+
+                            String[] wordArray = words.substring(1,words.length()-1).split(",");
+                            for (int j=0; j < wordArray.length; j++) {
+                                String currentWord = wordArray[j].trim(); 
+                                currentSentence = currentSentence.replaceAll(currentWord,
+                                        "<b style=\"color: black; background-color: rgb(" + highlightingColors[j%6] + ");\">" + currentWord + "</b>");
+                            }
                        }
-                        prop.put("viewMode_sentences",sentences.length);

-                    } 
-                }
-                prop.put("error",0);
-                prop.put("error_url",url.toString());                
-                prop.put("error_hash",urlHash);
-                prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
-                prop.put("error_desc",urlEntry.descr());
-                prop.put("error_size",urlEntry.size());
-                prop.put("error_mimeType",resMime);
-            }        
+                        prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1)); 
+                        prop.put("viewMode_sentences_" + i + "_text",currentSentence);   
+                        prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
+                    }
+                    prop.put("viewMode_sentences",sentences.length);
+
+                } 
+            }
+            prop.put("error",0);
+            prop.put("error_url",url.toString());                
+            prop.put("error_hash",urlHash);
+            prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
+            prop.put("error_desc",urlEntry.descr());
+            prop.put("error_size",urlEntry.size());
+            prop.put("error_mimeType",resMime);

            return prop;
    }
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -56,6 +56,7 @@ import de.anomic.htmlFilter.htmlFilterImageEntry;
 import de.anomic.http.httpHeader;
 import de.anomic.kelondro.kelondroMSetTools;
 import de.anomic.kelondro.kelondroNaturalOrder;
+import de.anomic.net.URL;
 import de.anomic.plasma.plasmaCrawlLURL;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSearchImages;
@ -64,7 +65,6 @@ import de.anomic.plasma.plasmaSearchQuery;
 import de.anomic.plasma.plasmaSearchRankingProfile;
 import de.anomic.plasma.plasmaSearchTimingProfile;
 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.net.URL;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverDate;
 import de.anomic.server.serverObjects;
@ -192,13 +192,15 @@ public class yacysearch {
                plasmaCrawlLURL.Entry urlentry = sb.urlPool.loadedURL.load(recommendHash, null);
                if (urlentry != null) {
                    plasmaParserDocument document = sb.snippetCache.retrieveDocument(urlentry.url(), true);
-                    // create a news message
-                    HashMap map = new HashMap();
-                    map.put("url", urlentry.url().toNormalform().replace(',', '|'));
-                    map.put("title", urlentry.descr().replace(',', ' '));
-                    map.put("description", ((document == null) ? urlentry.descr() : document.getMainLongTitle()).replace(',', ' '));
-                    map.put("tags",  ((document == null) ? "" : document.getKeywords(' ')));
-                    yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map));
+                    if (document != null) {
+                        // create a news message
+                        HashMap map = new HashMap();
+                        map.put("url", urlentry.url().toNormalform().replace(',', '|'));
+                        map.put("title", urlentry.descr().replace(',', ' '));
+                        map.put("description", ((document == null) ? urlentry.descr() : document.getMainLongTitle()).replace(',', ' '));
+                        map.put("tags",  ((document == null) ? "" : document.getKeywords(' ')));
+                        yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map));
+                    }
                }
            }

--- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
+++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
@ -93,6 +93,8 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
    protected plasmaCrawlProfile.entry profile;  
    protected boolean acceptAllContent;
    
+    protected String errorMessage;
+    
    /**
     * The crawler thread pool
     */
@ -186,6 +188,8 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
    }    
    
    public void execute() {
+        
+        plasmaHTCache.Entry loadedResource = null;
        try {
            // setting threadname
            this.setName(plasmaCrawlWorker.threadBaseName + "_" + this.url);
@ -194,15 +198,23 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
            init();

            // loading resource
-            plasmaHTCache.Entry resource = load();
+            loadedResource = load();
+        } catch (IOException e) {
+            //throw e;
+        } finally {                        
+            // setting the error message (if available)
+            if (this.errorMessage != null) {
+                this.theMsg.setError(this.errorMessage);
+            }
            
            // store a reference to the result in the message object
            // this is e.g. needed by the snippet fetcher
-            this.theMsg.setResult(resource);
-
-        } catch (IOException e) {
-            //throw e;
-        } finally {
+            //
+            // Note: this is always called, even on empty results.
+            //       Otherwise the caller will block forever
+            this.theMsg.setResult(loadedResource);            
+            
+            // signal that this worker thread has finished the job
            this.done = true;
        }
    }    
@ -256,9 +268,13 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
        this.startdate = 0;
        this.profile = null;
        this.acceptAllContent = false;
+        this.errorMessage = null;
    }    
    
-    protected void addURLtoErrorDB(String failreason) {        
+    protected void addURLtoErrorDB(String failreason) { 
+        // remember error message
+        this.errorMessage = failreason;
+        
        // convert the referrer URL into a hash value
        String referrerHash = (this.refererURLString==null)?null:indexURL.urlHash(this.refererURLString);
        
--- a/source/de/anomic/plasma/crawler/plasmaCrawlerException.java
+++ b/source/de/anomic/plasma/crawler/plasmaCrawlerException.java
@ -0,0 +1,9 @@
+package de.anomic.plasma.crawler;
+
+import java.io.IOException;
+
+public class plasmaCrawlerException extends IOException {
+    public plasmaCrawlerException(String errorMsg) {
+        super(errorMsg);
+    }
+}
--- a/source/de/anomic/plasma/parser/AbstractParser.java
+++ b/source/de/anomic/plasma/parser/AbstractParser.java
@ -49,6 +49,7 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.io.InputStream;
 import de.anomic.net.URL;

@ -93,12 +94,35 @@ public abstract class AbstractParser implements Parser{
        this.libxDependencies = libxDependencies;
 	}

+    /**
+     * Check if the parser was interrupted.
+     * @throws InterruptedException if the parser was interrupted
+     */
    public static final void checkInterruption() throws InterruptedException {
        Thread currentThread = Thread.currentThread();
        if ((currentThread instanceof serverThread) && ((serverThread)currentThread).shutdownInProgress()) throw new InterruptedException("Shutdown in progress ...");
        if (currentThread.isInterrupted()) throw new InterruptedException("Shutdown in progress ...");    
    }
    
+    public final File createTempFile(String name) throws IOException {
+        String parserClassName = this.getClass().getName();
+        int idx = parserClassName.lastIndexOf(".");
+        if (idx != -1) {
+            parserClassName = parserClassName.substring(idx+1);
+        } 
+                    
+        // getting the file extension
+        idx = name.lastIndexOf("/");
+        String fileName = (idx != -1) ? name.substring(idx+1) : name;        
+        
+        idx = fileName.lastIndexOf(".");
+        String fileExt = (idx > -1) ? fileName.substring(idx+1) : "";
+        
+        // creates the temp file
+        File tempFile = File.createTempFile(parserClassName + "_" + ((idx>-1)?fileName.substring(0,idx):fileName), (fileExt.length()>0)?"."+fileExt:fileExt);
+        return tempFile;
+    }
+    
 	/**
 	 * Parsing a document available as byte array.
     * @param location the origin of the document 
@ -119,14 +143,17 @@ public abstract class AbstractParser implements Parser{
    ) throws ParserException, InterruptedException {
        ByteArrayInputStream contentInputStream = null;
        try {
+            // convert the byte array into a stream
            contentInputStream = new ByteArrayInputStream(source);
+            
+            // parse the stream
            return this.parse(location,mimeType,charset,contentInputStream); 
        } finally {
            if (contentInputStream != null) {
                try {
                    contentInputStream.close();
                    contentInputStream = null;
-                } catch (Exception e){}
+                } catch (Exception e){ /* ignore this */}
            }
        }
 	}
@ -151,12 +178,15 @@ public abstract class AbstractParser implements Parser{
 	) throws ParserException, InterruptedException {
        BufferedInputStream contentInputStream = null;
        try {
+            // create a stream from the file
            contentInputStream = new BufferedInputStream(new FileInputStream(sourceFile));
+            
+            // parse the stream
            return this.parse(location, mimeType, charset, contentInputStream);
        } catch (FileNotFoundException e) {
-            throw new ParserException(e.getMessage());
+            throw new ParserException("Unexpected error while parsing file. " + e.getMessage(),location); 
        } finally {
-            if (contentInputStream != null) try{contentInputStream.close();}catch(Exception e){}
+            if (contentInputStream != null) try{contentInputStream.close();}catch(Exception e){/* ignore this */}
        }
 	}
    
@ -201,6 +231,6 @@ public abstract class AbstractParser implements Parser{
     * Return the name of the parser
     */
    public String getName() {
-        return parserName;
+        return this.parserName;
    }
 }
--- a/source/de/anomic/plasma/parser/ParserException.java
+++ b/source/de/anomic/plasma/parser/ParserException.java
@ -44,24 +44,45 @@

 package de.anomic.plasma.parser;

+import de.anomic.net.URL;
+import de.anomic.plasma.plasmaCrawlEURL;
+
 public class ParserException extends Exception
 {
-
+    private String errorCode = null;
+    private URL url = null;
+    
 	private static final long serialVersionUID = 1L;

 	public ParserException() {
        super();
    }

-    public ParserException(String message) {
+    public ParserException(String message, URL url) {
+        this(message,url,plasmaCrawlEURL.DENIED_PARSER_ERROR);
+    }    
+    
+    public ParserException(String message, URL url, String errorCode) {
        super(message);
+        this.errorCode = errorCode;
+        this.url = url;
    }

-    public ParserException(String message, Throwable cause) {
+    public ParserException(String message, URL url, Throwable cause) {
+        this(message,url,cause,plasmaCrawlEURL.DENIED_PARSER_ERROR);
+    }
+    
+    public ParserException(String message, URL url, Throwable cause, String errorCode) {
        super(message, cause);
+        this.errorCode = errorCode;
+        this.url = url;
    }

-    public ParserException(Throwable cause) {
-        super(cause);
+    public String getErrorCode() {
+        return this.errorCode;
+    }
+    
+    public URL getURL() {
+        return this.url;
    }
 }
--- a/source/de/anomic/plasma/parser/bzip/bzipParser.java
+++ b/source/de/anomic/plasma/parser/bzip/bzipParser.java
@ -80,7 +80,7 @@ public class bzipParser extends AbstractParser implements Parser {
    
    public bzipParser() {        
        super(LIBX_DEPENDENCIES);
-        parserName = "Bzip 2 UNIX Compressed File Parser";
+        this.parserName = "Bzip 2 UNIX Compressed File Parser";
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -129,7 +129,9 @@ public class bzipParser extends AbstractParser implements Parser {
            return theParser.parseSource(location,null,null,tempFile);
        } catch (Exception e) {  
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the gzip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing bzip file. " + e.getMessage(),location);
        } finally {
            if (tempFile != null) tempFile.delete();
        }
--- a/source/de/anomic/plasma/parser/doc/docParser.java
+++ b/source/de/anomic/plasma/parser/doc/docParser.java
@ -75,7 +75,7 @@ implements Parser {
    
 	public docParser() {
 		super(LIBX_DEPENDENCIES);
-        parserName = "Word Document Parser";
+        this.parserName = "Word Document Parser";
 	}

 	public plasmaParserDocument parse(URL location, String mimeType, String charset,
@ -99,14 +99,16 @@ implements Parser {
                      null,
                      null,
                      null,
-                      contents.getBytes(),
+                      contents.getBytes("UTF-8"),
                      null,
                      null);
              
              return theDoc;             
 		} catch (Exception e) {			
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-			throw new ParserException("Unable to parse the doc content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing doc file. " + e.getMessage(),location);            
 		}        
 	}

--- a/source/de/anomic/plasma/parser/gzip/gzipParser.java
+++ b/source/de/anomic/plasma/parser/gzip/gzipParser.java
@ -76,7 +76,7 @@ public class gzipParser extends AbstractParser implements Parser {
    
    public gzipParser() {        
        super(LIBX_DEPENDENCIES);
-        parserName = "GNU Zip Compressed Archive Parser";
+        this.parserName = "GNU Zip Compressed Archive Parser";
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -113,7 +113,9 @@ public class gzipParser extends AbstractParser implements Parser {
            return theParser.parseSource(location,null,null,tempFile);
        } catch (Exception e) {    
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the gzip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing gzip file. " + e.getMessage(),location); 
        } finally {
            if (tempFile != null) tempFile.delete();
        }
--- a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
+++ b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
@ -44,6 +44,7 @@
 package de.anomic.plasma.parser.mimeType;

 import java.io.File;
+import java.io.IOException;
 import java.io.InputStream;
 import de.anomic.net.URL;
 import java.util.Collection;
@ -99,7 +100,7 @@ implements Parser {
    
    public mimeTypeParser() {
        super(LIBX_DEPENDENCIES);
-        parserName = "MimeType Parser"; 
+        this.parserName = "MimeType Parser"; 
    }
    
    public String getMimeType (File sourceFile) {
@ -142,8 +143,8 @@ implements Parser {
            threadLoopDetection.put(Thread.currentThread(),new Integer(loopDepth.intValue()+1));
            
            // deactivating the logging for jMimeMagic
-            Logger theLogger = Logger.getLogger("net.sf.jmimemagic");
-            theLogger.setLevel(Level.OFF);
+            Logger jmimeMagicLogger = Logger.getLogger("net.sf.jmimemagic");
+            jmimeMagicLogger.setLevel(Level.OFF);
            
            Magic theMagic = new Magic();           
            MagicMatch match = theMagic.getMagicMatch(sourceFile);
@ -160,8 +161,8 @@ implements Parser {
                }
                
                // to avoid loops we have to test if the mimetype has changed ...
-                if (this.getSupportedMimeTypes().containsKey(mimeType)) return null;
-                if (orgMimeType.equals(mimeType)) return null;
+                if (this.getSupportedMimeTypes().containsKey(mimeType)) throw new ParserException("Unable to detect mimetype of resource.",location);
+                if (orgMimeType.equals(mimeType)) throw new ParserException("Unable to detect mimetype of resource.",location);
                                
                // check for interruption
                checkInterruption();
@ -170,11 +171,13 @@ implements Parser {
                plasmaParser theParser = new plasmaParser();
                return theParser.parseSource(location,mimeType,charset,sourceFile);
            }
-            return null;
+            throw new ParserException("Unable to detect mimetype of resource.",location);
            
        } catch (Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            return null;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while detect mimetype of resource. " + e.getMessage(),location); 
        } finally {
            Integer loopDepth = (Integer) threadLoopDetection.get(Thread.currentThread());                
            if (loopDepth.intValue() <= 1) {
@ -186,14 +189,14 @@ implements Parser {
    }
    
    public plasmaParserDocument parse(URL location, String mimeType,String charset,
-            InputStream source) throws ParserException {
+            InputStream source) throws ParserException, InterruptedException {
        File dstFile = null;
        try {
            dstFile = File.createTempFile("mimeTypeParser",".tmp");
            serverFileUtils.copy(source,dstFile);
            return parse(location,mimeType,charset,dstFile);
-        } catch (Exception e) {            
-            return null;
+        } catch (IOException e) {
+            throw new ParserException("Unexpected error while detect mimetype of resource. " + e.getMessage(),location);
        } finally {
            if (dstFile != null) {dstFile.delete();}            
        }
--- a/source/de/anomic/plasma/parser/odt/odtParser.java
+++ b/source/de/anomic/plasma/parser/odt/odtParser.java
@ -84,7 +84,7 @@ public class odtParser extends AbstractParser implements Parser {
    
    public odtParser() {        
        super(LIBX_DEPENDENCIES);
-        parserName = "OASIS OpenDocument V2 Text Document Parser"; 
+        this.parserName = "OASIS OpenDocument V2 Text Document Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -96,7 +96,7 @@ public class odtParser extends AbstractParser implements Parser {
        try {          
            byte[] docContent     = null;
            String docDescription = null;
-            String docKeywords    = null;
+            String docKeywordStr    = null;
            String docShortTitle  = null;
            String docLongTitle   = null;
            
@ -125,7 +125,7 @@ public class odtParser extends AbstractParser implements Parser {
                    ODFMetaFileAnalyzer metaAnalyzer = new ODFMetaFileAnalyzer();
                    OpenDocumentMetadata metaData = metaAnalyzer.analyzeMetaData(zipFileEntryStream);
                    docDescription = metaData.getDescription();
-                    docKeywords    = metaData.getKeyword();
+                    docKeywordStr    = metaData.getKeyword();
                    docShortTitle  = metaData.getTitle();
                    docLongTitle   = metaData.getSubject();
                    
@ -149,11 +149,16 @@ public class odtParser extends AbstractParser implements Parser {
                }
            }
         
+            // split the keywords
+            String[] docKeywords = null;
+            if (docKeywordStr != null) docKeywords = docKeywordStr.split(" |,");
+            
+            // create the parser document
            return new plasmaParserDocument(
                    location,
                    mimeType,
                    "UTF-8",
-                    docKeywords.split(" |,"),
+                    docKeywords,
                    docShortTitle, 
                    docLongTitle,
                    null,
@ -163,13 +168,13 @@ public class odtParser extends AbstractParser implements Parser {
                    null);
        } catch (Exception e) {            
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the odt content. " + e.getMessage());
-        } catch (Error e) {
-            throw new ParserException("Unable to parse the odt content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing odt file. " + e.getMessage(),location); 
        }
    }
    
-    public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException {
+    public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
        File dest = null;
        try {
            // creating a tempfile
@ -182,9 +187,12 @@ public class odtParser extends AbstractParser implements Parser {
            // parsing the content
            return parse(location, mimeType, charset, dest);
        } catch (Exception e) {
-            throw new ParserException("Unable to parse the odt document. " + e.getMessage());
+            if (e instanceof InterruptedException) throw (InterruptedException) e;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing odt file. " + e.getMessage(),location); 
        } finally {
-            if (dest != null) try { dest.delete(); } catch (Exception e){}
+            if (dest != null) try { dest.delete(); } catch (Exception e){/* ignore this */}
        }
    }
    
--- a/source/de/anomic/plasma/parser/pdf/pdfParser.java
+++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java
@ -78,7 +78,7 @@ public class pdfParser extends AbstractParser implements Parser {
    
    public pdfParser() {        
        super(LIBX_DEPENDENCIES);
-        parserName = "Acrobat Portable Document Parser"; 
+        this.parserName = "Acrobat Portable Document Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -98,7 +98,7 @@ public class pdfParser extends AbstractParser implements Parser {
 //            Logger theLogger = Logger.getLogger("org.pdfbox");
 //            theLogger.setLevel(Level.INFO);            
            
-            String docTitle = null, docSubject = null, /*docAuthor = null,*/ docKeyWords = null;
+            String docTitle = null, docSubject = null, /*docAuthor = null,*/ docKeywordStr = null;
            
            // check for interruption
            checkInterruption();
@ -120,7 +120,7 @@ public class pdfParser extends AbstractParser implements Parser {
                docTitle = theDocInfo.getTitle();
                docSubject = theDocInfo.getSubject();
                //docAuthor = theDocInfo.getAuthor();
-                docKeyWords = theDocInfo.getKeywords();
+                docKeywordStr = theDocInfo.getKeywords();
            }
            
            serverByteBuffer out = new serverByteBuffer();
@ -142,18 +142,14 @@ public class pdfParser extends AbstractParser implements Parser {
                replaceAll("\t"," ");                
            }
            
-            /*
-             *         public document(URL location, String mimeType,
-                            String keywords, String shortTitle, String longTitle,
-                            String[] sections, String abstrct,
-                            byte[] text, Map anchors, Map images) {
-             * 
-             */            
+            String[] docKeywords = null;
+            if (docKeywordStr != null) docKeywords = docKeywordStr.split(" |,");
+            
            plasmaParserDocument theDoc = new plasmaParserDocument(
                    location,
                    mimeType,
                    "UTF-8",
-                    docKeyWords.split(" |,"),
+                    docKeywords,
                    docSubject,
                    docTitle,
                    null,
@ -166,10 +162,12 @@ public class pdfParser extends AbstractParser implements Parser {
        }
        catch (Exception e) {       
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the pdf content. " + e.getMessage(),e);
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing pdf file. " + e.getMessage(),location); 
        } finally {
-            if (theDocument != null) try { theDocument.close(); } catch (Exception e) {}
-            if (writer != null)      try { writer.close(); }      catch (Exception e) {}
+            if (theDocument != null) try { theDocument.close(); } catch (Exception e) {/* ignore this */}
+            if (writer != null)      try { writer.close(); }      catch (Exception e) {/* ignore this */}
            Thread.currentThread().setPriority(Thread.NORM_PRIORITY);
        }
    }
--- a/source/de/anomic/plasma/parser/rpm/rpmParser.java
+++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java
@ -84,7 +84,7 @@ public class rpmParser extends AbstractParser implements Parser {
    
    public rpmParser() {        
        super(LIBX_DEPENDENCIES);
-        parserName = "rpm Parser"; 
+        this.parserName = "rpm Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -126,12 +126,12 @@ public class rpmParser extends AbstractParser implements Parser {
                
                // getting the next tag
                DataTypeIf tag = rpmFile.getTag(headerNames[i]);
-                if (tag != null) {
-                    content.append(headerNames[i])
-                               .append(": ")
-                               .append(tag.toString())
-                               .append("\n");
-                }
+                if (tag == null) continue;
+                
+                content.append(headerNames[i])
+                .append(": ")
+                .append(tag.toString())
+                .append("\n");
                
                if (headerNames[i].equals("N")) name = tag.toString();
                else if (headerNames[i].equals("SUMMARY")) summary = tag.toString();
@ -153,16 +153,18 @@ public class rpmParser extends AbstractParser implements Parser {
                    summary,
                    null,
                    description,
-                    content.toString().getBytes(),
+                    content.toString().getBytes("UTF-8"),
                    anchors,
                    null); 
            
            return theDoc;
        } catch (Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the rpm file. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing rpm file. " + e.getMessage(),location); 
        } finally {
-            if (rpmFile != null) try { rpmFile.close(); } catch (Exception e) {}
+            if (rpmFile != null) try { rpmFile.close(); } catch (Exception e) {/* ignore this */}
        }
    }
    
--- a/source/de/anomic/plasma/parser/rss/rssParser.java
+++ b/source/de/anomic/plasma/parser/rss/rssParser.java
@ -98,7 +98,7 @@ public class rssParser extends AbstractParser implements Parser {
    
 	public rssParser() {
 		super(LIBX_DEPENDENCIES);
-        parserName = "Rich Site Summary/Atom Feed Parser"; 
+        this.parserName = "Rich Site Summary/Atom Feed Parser"; 
 	}

 	public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
@ -149,7 +149,7 @@ public class rssParser extends AbstractParser implements Parser {
                    anchors.put(itemURL.toString(),itemTitle);
                    
                	if ((text.length() != 0) && (text.byteAt(text.length() - 1) != 32)) text.append((byte) 32);
-                	text.append(new serverCharBuffer(htmlFilterAbstractScraper.stripAll(new serverCharBuffer(itemDescr.toCharArray()))).trim()).append(' '); // TODO: this does not work for utf-8
+                	text.append(new serverCharBuffer(htmlFilterAbstractScraper.stripAll(new serverCharBuffer(itemDescr.toCharArray()))).trim().toString()).append(' ');
                    
                    String itemContent = item.getElementValue("content");
                    if ((itemContent != null) && (itemContent.length() > 0)) {
@ -183,11 +183,6 @@ public class rssParser extends AbstractParser implements Parser {
                }
            }
            
-	        /* (URL location, String mimeType,
-                    String keywords, String shortTitle, String longTitle,
-                    String[] sections, String abstrct,
-                    byte[] text, Map anchors, Map images)
-            */
            plasmaParserDocument theDoc = new plasmaParserDocument(
                    location,
                    mimeType,
@ -205,7 +200,9 @@ public class rssParser extends AbstractParser implements Parser {
            
        } catch (Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the rss file. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing rss file." + e.getMessage(),location); 
        }
 	}

--- a/source/de/anomic/plasma/parser/rtf/rtfParser.java
+++ b/source/de/anomic/plasma/parser/rtf/rtfParser.java
@ -77,7 +77,7 @@ implements Parser {
    
 	public rtfParser() {
 		super(LIBX_DEPENDENCIES);
-        parserName = "Rich Text Format Parser";  
+        this.parserName = "Rich Text Format Parser";  
 	}

 	public plasmaParserDocument parse(URL location, String mimeType, String charset,
@ -113,7 +113,9 @@ implements Parser {
 		}
 		catch (Exception e) {			
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-			throw new ParserException("Unable to parse the rdf content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing rtf resource." + e.getMessage(),location); 
 		}        
 	}

--- a/source/de/anomic/plasma/parser/tar/tarParser.java
+++ b/source/de/anomic/plasma/parser/tar/tarParser.java
@ -87,7 +87,7 @@ public class tarParser extends AbstractParser implements Parser {
    
    public tarParser() {        
        super(LIBX_DEPENDENCIES);
-        parserName = "Tape Archive File Parser"; 
+        this.parserName = "Tape Archive File Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -128,12 +128,11 @@ public class tarParser extends AbstractParser implements Parser {
                // skip directories
                if (entry.isDirectory()) continue;
                
-                // Get the entry name
-                int idx = -1;
+                // Get the short entry name
                String entryName = entry.getName();
-                idx = entryName.lastIndexOf("/");
-                if (idx != -1) entryName = entryName.substring(idx+1);
-                idx = entryName.lastIndexOf(".");
+                
+                // getting the entry file extension
+                int idx = entryName.lastIndexOf(".");
                String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                
                // trying to determine the mimeType per file extension   
@ -143,19 +142,21 @@ public class tarParser extends AbstractParser implements Parser {
                plasmaParserDocument theDoc = null;
                File tempFile = null;
                try {
-                    byte[] buf = new byte[(int) entry.getSize()];
-                    /*int bytesRead =*/ tin.read(buf);
-
-                    tempFile = File.createTempFile("tarParser_" + ((idx>-1)?entryName.substring(0,idx):entryName), (entryExt.length()>0)?"."+entryExt:entryExt);
-                    serverFileUtils.write(buf, tempFile);           
+                    // create the temp file
+                    tempFile = createTempFile(entryName);
+                    
+                    // copy the data into the file
+                    serverFileUtils.copy(tin,tempFile,entry.getSize());
                    
                    // check for interruption
                    checkInterruption();
                    
                    // parsing the content                    
-                    theDoc = theParser.parseSource(new URL(tempFile),entryMime,null,tempFile);
+                    theDoc = theParser.parseSource(new URL(location,"#" + entryName),entryMime,null,tempFile);
+                } catch (ParserException e) {
+                    this.theLogger.logInfo("Unable to parse tar file entry '" + entryName + "'. " + e.getErrorCode());
                } finally {
-                    if (tempFile != null) try {tempFile.delete(); } catch(Exception ex){}
+                    if (tempFile != null) try {tempFile.delete(); } catch(Exception ex){/* ignore this */}
                }
                if (theDoc == null) continue;
                
@ -200,7 +201,9 @@ public class tarParser extends AbstractParser implements Parser {
                    docImages);
        } catch (Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the zip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing tar resource. " + e.getMessage(),location); 
        }
    }
    
--- a/source/de/anomic/plasma/parser/vcf/vcfParser.java
+++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java
@ -215,7 +215,7 @@ public class vcfParser extends AbstractParser implements Parser {
                            URL newURL = new URL(value);
                            anchors.put(newURL.toString(),newURL.toString());   
                            //parsedData.put(key,value);
-                        } catch (MalformedURLException ex) {}                                                
+                        } catch (MalformedURLException ex) {/* ignore this */}                                                
                    } else if (
                            !key.equalsIgnoreCase("BEGIN") &&
                            !key.equalsIgnoreCase("END") &&
@ -255,12 +255,10 @@ public class vcfParser extends AbstractParser implements Parser {
            return theDoc;
        } catch (Exception e) { 
            if (e instanceof InterruptedException) throw (InterruptedException) e;
+            if (e instanceof ParserException) throw (ParserException) e;
            
-            String errorMsg = "Unable to parse the vcard content. " + e.getMessage();
-            this.theLogger.logSevere(errorMsg);            
-            throw new ParserException(errorMsg);
-        } finally {
-        }
+            throw new ParserException("Unexpected error while parsing vcf resource. " + e.getMessage(),location);
+        } 
    }
    
    public void reset() {
--- a/source/de/anomic/plasma/parser/zip/zipParser.java
+++ b/source/de/anomic/plasma/parser/zip/zipParser.java
@ -43,9 +43,8 @@

 package de.anomic.plasma.parser.zip;

-import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.InputStream;
-import de.anomic.net.URL;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Hashtable;
@ -55,12 +54,14 @@ import java.util.TreeSet;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;

+import de.anomic.net.URL;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.parser.AbstractParser;
 import de.anomic.plasma.parser.Parser;
 import de.anomic.plasma.parser.ParserException;
 import de.anomic.server.serverByteBuffer;
+import de.anomic.server.serverFileUtils;

 public class zipParser extends AbstractParser implements Parser {

@ -84,7 +85,7 @@ public class zipParser extends AbstractParser implements Parser {
    
    public zipParser() {        
        super(LIBX_DEPENDENCIES);
-        parserName = "Compressed Archive File Parser"; 
+        this.parserName = "Compressed Archive File Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -110,29 +111,39 @@ public class zipParser extends AbstractParser implements Parser {
            ZipEntry entry;
            ZipInputStream zippedContent = new ZipInputStream(source);                      
            while ((entry = zippedContent.getNextEntry()) !=null) {
+                // check for interruption
+                checkInterruption();                
+                
                // skip directories
                if (entry.isDirectory()) continue;
                
                // Get the entry name
                String entryName = entry.getName();                
                int idx = entryName.lastIndexOf(".");
-                String entryExt = (idx > -1) ? entryName.substring(idx+1) : null;
-                
-                // trying to determine the mimeType per file extension   
-                String entryMime = plasmaParser.getMimeTypeByFileExt(entryExt);
                
-                // getting the entry content
-                ByteArrayOutputStream bos = new ByteArrayOutputStream();
-                byte[] buf = new byte[(int) entry.getSize()];
-                /*int bytesRead =*/ zippedContent.read(buf);
-                bos.write(buf);
-                byte[] ut = bos.toByteArray();           
+                // getting the file extension
+                String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                
-                // check for interruption
-                checkInterruption();
+                // trying to determine the mimeType per file extension   
+                String entryMime = plasmaParser.getMimeTypeByFileExt(entryExt);      
                
                // parsing the content
-                plasmaParserDocument theDoc = theParser.parseSource(location,entryMime,null, ut);
+                plasmaParserDocument theDoc = null;
+                File tempFile = null;
+                try {
+                    // create the temp file
+                    tempFile = createTempFile(entryName);
+                    
+                    // copy the data into the file
+                    serverFileUtils.copy(zippedContent,tempFile,entry.getSize());                    
+                    
+                    // parsing the zip file entry
+                    theDoc = theParser.parseSource(new URL(location,"#" + entryName),entryMime,null, tempFile);
+                } catch (ParserException e) {
+                    this.theLogger.logInfo("Unable to parse zip file entry '" + entryName + "'. " + e.getErrorCode());
+                } finally {
+                    if (tempFile != null) try {tempFile.delete(); } catch(Exception ex){/* ignore this */}
+                }
                if (theDoc == null) continue;
                
                // merging all documents together
@ -157,11 +168,7 @@ public class zipParser extends AbstractParser implements Parser {
                docImages.addAll(theDoc.getImages());
            }
            
-            /* (URL location, String mimeType,
-             String keywords, String shortTitle, String longTitle,
-             String[] sections, String abstrct,
-             byte[] text, Map anchors, Map images)
-             */            
+        
            return new plasmaParserDocument(
                    location,
                    mimeType,
@ -176,9 +183,9 @@ public class zipParser extends AbstractParser implements Parser {
                    docImages);
        } catch (Exception e) {  
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the zip content. " + e.getMessage());
-        } catch (Error e) {
-            throw new ParserException("Unable to parse the zip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing zip resource. " + e.getMessage(),location);
        }
    }
    
--- a/source/de/anomic/plasma/plasmaCrawlLoader.java
+++ b/source/de/anomic/plasma/plasmaCrawlLoader.java
@ -52,6 +52,7 @@ import org.apache.commons.pool.impl.GenericObjectPool;

 import de.anomic.net.URL;
 import de.anomic.plasma.crawler.plasmaCrawlWorker;
+import de.anomic.plasma.crawler.plasmaCrawlerException;
 import de.anomic.plasma.crawler.plasmaCrawlerFactory;
 import de.anomic.plasma.crawler.plasmaCrawlerMsgQueue;
 import de.anomic.plasma.crawler.plasmaCrawlerPool;
@ -83,7 +84,7 @@ public final class plasmaCrawlLoader extends Thread {

        // supported protocols 
        // TODO: change this, e.g. by loading settings from file
-        this.supportedProtocols = new HashSet(Arrays.asList(new String[]{"http","https" /* ,"ftp" */}));
+        this.supportedProtocols = new HashSet(Arrays.asList(new String[]{"http","https"/* ,"ftp" */}));
        
        // configuring the crawler messagequeue
        this.theQueue = new plasmaCrawlerMsgQueue();
@ -99,6 +100,8 @@ public final class plasmaCrawlLoader extends Thread {
        // The maximum number of idle connections connections in the pool
        // 0 = no limit.        
        this.crawlerPoolConfig.maxIdle = Integer.parseInt(switchboard.getConfig("crawler.MaxIdleThreads","7"));
+        
+        // minIdle configuration not possible for keyedObjectPools
        //this.crawlerPoolConfig.minIdle = Integer.parseInt(switchboard.getConfig("crawler.MinIdleThreads","5"));    

        // block undefinitely 
@ -216,7 +219,7 @@ public final class plasmaCrawlLoader extends Thread {
            int depth, 
            plasmaCrawlProfile.entry profile,
            int timeout
-    ) {
+    ) throws plasmaCrawlerException {

        plasmaHTCache.Entry result = null;
        if (!this.crawlwerPool.isClosed) {            
@ -241,11 +244,17 @@ public final class plasmaCrawlLoader extends Thread {
                this.execute(theMsg);

                // wait for the crawl job result
-                result = theMsg.waitForResult();
-
+                result = theMsg.waitForResult();                
            } catch (Exception e) {
-                this.log.logSevere("plasmaCrawlLoader.loadSync", e);
+                this.log.logSevere("plasmaCrawlLoader.loadSync: Unexpected error", e);
+                throw new plasmaCrawlerException("Unexpected error: " + e.getMessage());
            }
+            
+            // check if an error has occured
+            if (result == null) {
+                String errorMsg = theMsg.getError();
+                throw new plasmaCrawlerException(errorMsg);
+            }            
        }        

        // return the result
--- a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
+++ b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
@ -59,6 +59,7 @@ public final class plasmaCrawlLoaderMessage {
    
    private serverSemaphore resultSync  = null;
    private plasmaHTCache.Entry result;
+    private String errorMessage;
    
    // loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) {
    public plasmaCrawlLoaderMessage(
@ -86,6 +87,14 @@ public final class plasmaCrawlLoaderMessage {
        this.result = null;
    } 
    
+    public void setError(String errorMessage) {
+        this.errorMessage = errorMessage;
+    }
+    
+    public String getError() {
+        return this.errorMessage;
+    }
+    
    public void setResult(plasmaHTCache.Entry theResult) {
        // store the result
        this.result = theResult;
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@ -73,12 +73,14 @@ import de.anomic.htmlFilter.htmlFilterInputStream;
 import de.anomic.htmlFilter.htmlFilterWriter;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpc;
+import de.anomic.index.indexURL;
 import de.anomic.net.URL;
 import de.anomic.plasma.parser.Parser;
 import de.anomic.plasma.parser.ParserException;
 import de.anomic.plasma.parser.ParserInfo;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.logging.serverLog;
+import de.anomic.tools.bitfield;

 public final class plasmaParser {
    public static final String PARSER_MODE_PROXY   = "PROXY";
@ -407,7 +409,7 @@ public final class plasmaParser {
                        if (neededLibx != null) {
                            for (int libxId=0; libxId < neededLibx.length; libxId++) {
                                if (javaClassPath.indexOf(neededLibx[libxId]) == -1) {
-                                    throw new ParserException("Missing dependency detected: '" + neededLibx[libxId] + "'.");
+                                    throw new Exception("Missing dependency detected: '" + neededLibx[libxId] + "'.");
                                }
                                neededLibxBuf.append(neededLibx[libxId])
                                             .append(",");
@ -464,42 +466,67 @@ public final class plasmaParser {
        // closing the parser object pool
        try {
            theParserPool.close();
-        } catch (Exception e) { }
+        } catch (Exception e) {/* ignore this */}
    }    
    
-    public plasmaParserDocument parseSource(URL location, String mimeType, String charset, byte[] source) throws InterruptedException {
+    public plasmaParserDocument parseSource(URL location, String mimeType, String charset, byte[] source) 
+    throws InterruptedException, ParserException {
        File tempFile = null;
        try {
+            // creating a temp file to store the byte array
            tempFile = File.createTempFile("parseSource", ".tmp");
            serverFileUtils.write(source, tempFile);
+            
+            // parsing the temp file
            return parseSource(location, mimeType, charset, tempFile);
+            
        } catch (Exception e) {
+            // Interrupted- and Parser-Exceptions should pass through
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            serverLog.logSevere("PARSER", "parseSource1: " + e.getMessage(), e);
-            return null;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            // log unexpected error
+            this.theLogger.logSevere("Unexpected exception in parseSource1: " + e.getMessage(), e);
+            throw new ParserException("Unexpected exception while parsing " + location,location, e);
        } finally {
-            if (tempFile != null) try { tempFile.delete(); } catch (Exception ex){}
+            if (tempFile != null) try { tempFile.delete(); } catch (Exception ex){/* ignore this */}
        }
        
    }

-    public plasmaParserDocument parseSource(URL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException {
+    public plasmaParserDocument parseSource(URL location, String theMimeType, String theDocumentCharset, File sourceFile) 
+    throws InterruptedException, ParserException {

        Parser theParser = null;
+        String mimeType = null;
        try {
            // getting the mimetype of the document
-            mimeType = getRealMimeType(mimeType);
+            mimeType = getRealMimeType(theMimeType);
            
            // getting the file extension of the document
            String fileExt = getFileExt(location);
            
            // getting the charset of the document
-            if (documentCharset == null)
-                // TODO: do a charset detection here ....
-                documentCharset = "ISO-8859-1";
+            // TODO: do a charset detection here ....
+            String documentCharset = (theDocumentCharset == null) ? "ISO-8859-1" : theDocumentCharset;
+            
+            // testing if parsing is supported for this resource
+            if (!plasmaParser.supportedContent(location,mimeType)) {
+                String errorMsg = "No parser available to parse mimetype";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT);
+            }
+            
+            // testing if the resource is not empty
+            if (!(sourceFile.exists() && sourceFile.canRead() && sourceFile.length() > 0)) {
+                String errorMsg = "No resource content available.";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_NOT_PARSEABLE_NO_CONTENT);
+            }
+
            
            if (this.theLogger.isFine())
-                this.theLogger.logFine("Parsing " + location + " with mimeType '" + mimeType + 
+                this.theLogger.logInfo("Parsing " + location + " with mimeType '" + mimeType + 
                                       "' and file extension '" + fileExt + "'.");
            
            /*
@ -555,26 +582,43 @@ public final class plasmaParser {
            theParser = this.getParser(mimeType);
            
            // if a parser was found we use it ...
+            plasmaParserDocument doc = null;
            if (theParser != null) {
-                return theParser.parse(location, mimeType,documentCharset,sourceFile);
+                doc = theParser.parse(location, mimeType,documentCharset,sourceFile);
            } else if (realtimeParsableMimeTypesContains(mimeType)) {                      
-                return parseHtml(location, mimeType, documentCharset, sourceFile);
+                doc = parseHtml(location, mimeType, documentCharset, sourceFile);
            } else {
-                serverLog.logWarning("PARSER", "parseSource2: wrong mime type");
-                return null;
+                String errorMsg = "No parser available to parse mimetype";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT);                
+            }
+            
+            // check result
+            if (doc == null) {
+                String errorMsg = "Unexpected error. Parser returned null.";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location);                
            }
+            return doc;
+            
        } catch (Exception e) {
+            // Interrupted- and Parser-Exceptions should pass through
            if (e instanceof InterruptedException) throw (InterruptedException) e;
-            serverLog.logSevere("PARSER", "parseSource2: " + e.getMessage(), e);
-            return null;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            // log unexpected error
+            String errorMsg = "Unexpected exception. " + e.getMessage();
+            this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
+            throw new ParserException(errorMsg,location,e);            
+            
        } finally {
            if (theParser != null) {
-                try { plasmaParser.theParserPool.returnObject(mimeType, theParser); } catch (Exception e) { }
+                try { plasmaParser.theParserPool.returnObject(mimeType, theParser); } catch (Exception e) { /* ignore this */}
            }
        }
    }
    
-    private plasmaParserDocument parseHtml(URL location, String mimeType, String documentCharset, File sourceFile) throws IOException {
+    private plasmaParserDocument parseHtml(URL location, String mimeType, String documentCharset, File sourceFile) throws IOException, ParserException {
        
        // ...otherwise we make a scraper and transformer
        FileInputStream fileIn = new FileInputStream(sourceFile);
@ -596,8 +640,9 @@ public final class plasmaParser {
        //serverFileUtils.copy(sourceFile, hfos);
        //hfos.close();
        if (writer.binarySuspect()) {
-            this.theLogger.logInfo("Binary data found in URL " + location);
-            return null;
+            String errorMsg = "Binary data found in resource";
+            this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg);
+            throw new ParserException(errorMsg,location);    
        }
        return transformScraper(location, mimeType, documentCharset, scraper);        
    }
--- a/source/de/anomic/plasma/plasmaSearchImages.java
+++ b/source/de/anomic/plasma/plasmaSearchImages.java
@ -43,6 +43,8 @@ package de.anomic.plasma;

 import java.net.MalformedURLException;
 import de.anomic.net.URL;
+import de.anomic.plasma.parser.ParserException;
+
 import java.util.Iterator;
 import java.util.Map;
 import java.util.TreeSet;
@ -60,10 +62,16 @@ public final class plasmaSearchImages {
        if (maxTime > 10) {
            byte[] res = sc.getResource(url, true, (int) maxTime);
            if (res != null) {
-                plasmaParserDocument document = sc.parseDocument(url, res);
-
+                plasmaParserDocument document = null;
+                try {
+                    document = sc.parseDocument(url, res);
+                } catch (ParserException e) {
+                    // parsing failed
+                }
+                if (document == null) return;
+                
                // add the image links
-                if (document != null) this.addAll(document.getImages());
+                this.addAll(document.getImages());

                // add also links from pages one step deeper, if depth > 0
                if (depth > 0) {
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@ -45,6 +45,8 @@ package de.anomic.plasma;
 import java.io.IOException;
 import de.anomic.net.URL;
 import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.crawler.plasmaCrawlerException;
+import de.anomic.plasma.parser.ParserException;

 import java.util.Enumeration;
 import java.util.HashMap;
@ -164,30 +166,51 @@ public class plasmaSnippetCache {
            return new Snippet(line, source, null);
        }
        
+        /* ===========================================================================
+         * LOADING RESOURCE DATA
+         * =========================================================================== */
        // if the snippet is not in the cache, we can try to get it from the htcache
        byte[] resource = null;
        IResourceInfo docInfo = null;
        try {
+            // trying to load the resource from the cache
            resource = this.cacheManager.loadResourceContent(url);
-            if ((fetchOnline) && (resource == null)) {
+            docInfo = this.cacheManager.loadResourceInfo(url);
+            
+            // if not found try to download it
+            if ((resource == null) && (fetchOnline)) {
+                // download resource using the crawler
                plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
+                
+                // getting resource metadata (e.g. the http headers for http resources)
                if (entry != null) {
                    docInfo = entry.getDocumentInfo();
                }
+                
+                // now the resource should be stored in the cache, load body
                resource = this.cacheManager.loadResourceContent(url);
+                if (resource == null) {
+                    //System.out.println("cannot load document for URL " + url);
+                    return new Snippet(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL");
+                }                                
                source = SOURCE_WEB;
            }
-        } catch (IOException e) {
-            e.printStackTrace();
+        } catch (Exception e) {
+            if (!(e instanceof plasmaCrawlerException)) e.printStackTrace();
            return new Snippet(null, ERROR_SOURCE_LOADING, "error loading resource from web: " + e.getMessage());
        }
-        if (resource == null) {
-            //System.out.println("cannot load document for URL " + url);
-            return new Snippet(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL");
-        }
-        plasmaParserDocument document = parseDocument(url, resource, docInfo);
        
+        /* ===========================================================================
+         * PARSING RESOURCE
+         * =========================================================================== */
+        plasmaParserDocument document = null;
+        try {
+             document = parseDocument(url, resource, docInfo);            
+        } catch (ParserException e) {
+            return new Snippet(null, ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
+        }
        if (document == null) return new Snippet(null, ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
+                
        //System.out.println("loaded document for URL " + url);
        String[] sentences = document.getSentences();
        //System.out.println("----" + url.toString()); for (int l = 0; l < sentences.length; l++) System.out.println(sentences[l]);
@ -196,6 +219,9 @@ public class plasmaSnippetCache {
            return new Snippet(null, ERROR_PARSER_NO_LINES, "parser returned no sentences");
        }

+        /* ===========================================================================
+         * COMPUTE SNIPPET
+         * =========================================================================== */        
        // we have found a parseable non-empty file: use the lines
        line = computeSnippet(sentences, queryhashes, 8 + 6 * queryhashes.size(), snippetMaxLength);
        //System.out.println("loaded snippet for URL " + url + ": " + line);
@ -207,22 +233,48 @@ public class plasmaSnippetCache {
        return new Snippet(line, source, null);
    }

+    /**
+     * Tries to load and parse a resource specified by it's URL.
+     * If the resource is not stored in cache and if fetchOnline is set the
+     * this function tries to download the resource from web.
+     * 
+     * @param url the URL of the resource
+     * @param fetchOnline specifies if the resource should be loaded from web if it'as not available in the cache
+     * @return the parsed document as {@link plasmaParserDocument}
+     */
    public plasmaParserDocument retrieveDocument(URL url, boolean fetchOnline) {
        byte[] resource = null;
        IResourceInfo docInfo = null;
        try {
+            // trying to load the resource body from cache
            resource = this.cacheManager.loadResourceContent(url);
+            
+            // if not available try to load resource from web
            if ((fetchOnline) && (resource == null)) {
+                // download resource using crawler
                plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
+                
+                // fetching metadata of the resource (e.g. http headers for http resource)
                if (entry != null) docInfo = entry.getDocumentInfo();
+                
+                // getting the resource body from the cache
                resource = this.cacheManager.loadResourceContent(url);
+            } else {
+                // trying to load resource metadata
+                docInfo = this.cacheManager.loadResourceInfo(url);
            }
-        } catch (IOException e) {
-            e.printStackTrace();
+            
+            // parsing document
+            if (resource == null) return null;
+            return parseDocument(url, resource, docInfo);
+        } catch (ParserException e) {
+            this.log.logWarning("Unable to parse resource. " + e.getMessage());
+            return null;
+        } catch (Exception e) {
+            this.log.logWarning("Unexpected error while retrieving document. " + e.getMessage(),e);
            return null;
        }
-        if (resource == null) return null;
-        return parseDocument(url, resource, docInfo);
+
    }
    
    public void storeToCache(String wordhashes, String urlhash, String snippet) {
@ -374,11 +426,11 @@ public class plasmaSnippetCache {
        return map;
    }
     
-    public plasmaParserDocument parseDocument(URL url, byte[] resource) {
+    public plasmaParserDocument parseDocument(URL url, byte[] resource) throws ParserException {
        return parseDocument(url, resource, null);
    }
    
-    public plasmaParserDocument parseDocument(URL url, byte[] resource, IResourceInfo docInfo) {
+    public plasmaParserDocument parseDocument(URL url, byte[] resource, IResourceInfo docInfo) throws ParserException {
        try {
            if (resource == null) return null;

@ -425,9 +477,15 @@ public class plasmaSnippetCache {
    public byte[] getResource(URL url, boolean fetchOnline, int socketTimeout) {
        // load the url as resource from the web
        try {
+            // trying to load the resource body from cache
            byte[] resource = cacheManager.loadResourceContent(url);
+            
+            // if the content is not available in cache try to download it from web
            if ((fetchOnline) && (resource == null)) {
+                // try to download the resource using a crawler
                loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout);
+                
+                // get the content from cache
                resource = cacheManager.loadResourceContent(url);
            }
            return resource;
@ -436,7 +494,7 @@ public class plasmaSnippetCache {
        }
    }
    
-    public plasmaHTCache.Entry loadResourceFromWeb(URL url, int socketTimeout) throws IOException {
+    public plasmaHTCache.Entry loadResourceFromWeb(URL url, int socketTimeout) throws plasmaCrawlerException {
        
        plasmaHTCache.Entry result = this.sb.cacheLoader.loadSync(
                url, 
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -144,6 +144,7 @@ import de.anomic.kelondro.kelondroMSetTools;
 import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.kelondro.kelondroMapTable;
 import de.anomic.plasma.dbImport.dbImportManager;
+import de.anomic.plasma.parser.ParserException;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverAbstractSwitch;
 import de.anomic.server.serverCodings;
@ -1392,7 +1393,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        }
    }
    
-    private plasmaParserDocument parseResource(plasmaSwitchboardQueue.Entry entry, String initiatorHash) throws InterruptedException {
+    private plasmaParserDocument parseResource(plasmaSwitchboardQueue.Entry entry, String initiatorHash) throws InterruptedException, ParserException {
        plasmaParserDocument document = null;

        // the mimetype of this entry
@ -1402,29 +1403,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        // the parser logger
        serverLog parserLogger = parser.getLogger();

-        // if the document content is supported we can start to parse the content
-        if (plasmaParser.supportedContent(
-                entry.url(),
-                mimeType)
-        ){
-            if ((entry.cacheFile().exists()) && (entry.cacheFile().length() > 0)) {
-                parserLogger.logFine("'" + entry.normalizedURLString() + "' is not parsed yet, parsing now from File");
-                document = parser.parseSource(entry.url(), mimeType, charset, entry.cacheFile());
-            } else {
-                parserLogger.logFine("'" + entry.normalizedURLString() + "' cannot be parsed, no resource available");
-                addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorHash, entry.anchorName(), plasmaCrawlEURL.DENIED_NOT_PARSEABLE_NO_CONTENT, new bitfield(indexURL.urlFlagLength));
-            }
-            if (document == null) {
-                parserLogger.logSevere("'" + entry.normalizedURLString() + "' parse failure");
-                addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorHash, entry.anchorName(), plasmaCrawlEURL.DENIED_PARSER_ERROR, new bitfield(indexURL.urlFlagLength));
-            }
-        } else {
-            parserLogger.logFine("'" + entry.normalizedURLString() + "'. Unsupported mimeType '" + ((mimeType == null) ? "null" : mimeType) + "'.");
-            addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorHash, entry.anchorName(), plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT, new bitfield(indexURL.urlFlagLength));
-        }  
-
-        checkInterruption();
-        return document;
+        // parse the document
+        return parseResource(entry.url(), mimeType, charset, entry.cacheFile());
+    }
+    
+    public plasmaParserDocument parseResource(URL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException, ParserException {
+        plasmaParserDocument doc = parser.parseSource(location, mimeType, documentCharset, sourceFile);
+        assert(doc != null) : "Unexpected error. Parser returned null.";
+        return doc;
    }
    
    private void processResourceStack(plasmaSwitchboardQueue.Entry entry) throws InterruptedException {
@ -1471,8 +1457,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            plasmaParserDocument document = null;
            parsingStartTime = System.currentTimeMillis();

+            try {
                document = this.parseResource(entry, initiatorPeerHash);
                if (document == null) return;
+            } catch (ParserException e) {
+                this.log.logInfo("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage());
+                addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorPeerHash, entry.anchorName(), e.getErrorCode(), new bitfield(indexURL.urlFlagLength));
+                return;
+            }
            
            parsingEndTime = System.currentTimeMillis();            
            
@ -2172,16 +2164,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        // determine the url string
        plasmaCrawlLURL.Entry entry = urlPool.loadedURL.load(urlhash, null);
        if (entry == null) return 0;
+        
        URL url = entry.url();
        if (url == null) return 0;
-        // get set of words
-        // Set words = plasmaCondenser.getWords(getText(getResource(url, fetchOnline)));
-        Iterator witer = plasmaCondenser.getWords(snippetCache.parseDocument(url, snippetCache.getResource(url, fetchOnline, 10000)).getText());
-        // delete all word references
-        int count = removeReferences(urlhash, witer);
-        // finally delete the url entry itself
-        urlPool.loadedURL.remove(urlhash);
-        return count;
+        
+        try {
+            // get set of words
+            // Set words = plasmaCondenser.getWords(getText(getResource(url, fetchOnline)));
+            Iterator witer = plasmaCondenser.getWords(snippetCache.parseDocument(url, snippetCache.getResource(url, fetchOnline, 10000)).getText());
+            // delete all word references
+            int count = removeReferences(urlhash, witer);
+            // finally delete the url entry itself
+            urlPool.loadedURL.remove(urlhash);
+            return count;
+        } catch (ParserException e) {
+            return 0;
+        }
    }
    
    public int removeReferences(URL url, Set words) {
--- a/source/de/anomic/server/serverByteBuffer.java
+++ b/source/de/anomic/server/serverByteBuffer.java
@ -188,6 +188,10 @@ public final class serverByteBuffer extends OutputStream {
    public serverByteBuffer append(String s) {
        return append(s.getBytes());
    }
+    
+    public serverByteBuffer append(String s, String charset) throws UnsupportedEncodingException {
+        return append(s.getBytes(charset));
+    }    

    public serverByteBuffer append(serverByteBuffer bb) {
        return append(bb.buffer, bb.offset, bb.length);
--- a/source/de/anomic/server/serverFileUtils.java
+++ b/source/de/anomic/server/serverFileUtils.java
@ -73,24 +73,39 @@ import de.anomic.kelondro.kelondroRowSet;

 public final class serverFileUtils {

+    private static final int DEFAULT_BUFFER_SIZE = 4096;
+    
+    public static long copy(InputStream source, OutputStream dest) throws IOException {
+        return copy(source,dest);
+    }
+    
    /**
    * Copies an InputStream to an OutputStream.
-    * @param source    InputStream
-    * @param dest    OutputStream
+    * @param source InputStream
+    * @param dest OutputStream
+    * @param count the total amount of bytes to copy
    * @return Total number of bytes copied.
+    * 
    * @see copy(InputStream source, File dest)
    * @see copyRange(File source, OutputStream dest, int start)
    * @see copy(File source, OutputStream dest)
    * @see copy(File source, File dest)
    */
-    public static int copy(InputStream source, OutputStream dest) throws IOException {
-        byte[] buffer = new byte[4096];
+    public static long copy(InputStream source, OutputStream dest, long count) throws IOException {
+        byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];                
+        int chunkSize = (int) ((count > 0) ? Math.min(count, DEFAULT_BUFFER_SIZE) : DEFAULT_BUFFER_SIZE);
        
-        int c, total = 0;
-        while ((c = source.read(buffer)) > 0) {
+        int c; long total = 0;
+        while ((c = source.read(buffer,0,chunkSize)) > 0) {
            dest.write(buffer, 0, c);
            dest.flush();
            total += c;
+            
+            if (count > 0) {
+                chunkSize = (int)Math.min(count-total,DEFAULT_BUFFER_SIZE);
+                if (chunkSize == 0) break;
+            }
+            
        }
        dest.flush();
        
@ -165,21 +180,26 @@ public final class serverFileUtils {
        }
        return count;
    }
+    
+    public static void copy(InputStream source, File dest) throws IOException {
+        copy(source,dest,-1);
+    }

    /**
    * Copies an InputStream to a File.
    * @param source    InputStream
    * @param dest    File
+    * @param the amount of bytes to copy
    * @see copy(InputStream source, OutputStream dest)
    * @see copyRange(File source, OutputStream dest, int start)
    * @see copy(File source, OutputStream dest)
    * @see copy(File source, File dest)
    */
-    public static void copy(InputStream source, File dest) throws IOException {
+    public static void copy(InputStream source, File dest, long count) throws IOException {
        FileOutputStream fos = null;
        try {
            fos = new FileOutputStream(dest);
-            copy(source, fos);
+            copy(source, fos, count);
        } finally {
            if (fos != null) try {fos.close();} catch (Exception e) {}
        }
@ -201,7 +221,7 @@ public final class serverFileUtils {
            fis = new FileInputStream(source);
            long skipped = fis.skip(start);
            if (skipped != start) throw new IllegalStateException("Unable to skip '" + start + "' bytes. Only '" + skipped + "' bytes skipped.");
-            copy(fis, dest);
+            copy(fis, dest,-1);
        } finally {
            if (fis != null) try { fis.close(); } catch (Exception e) {}
        }
@ -220,28 +240,33 @@ public final class serverFileUtils {
        InputStream fis = null;
        try {
            fis = new FileInputStream(source);
-            copy(fis, dest);
+            copy(fis, dest, -1);
        } finally {
            if (fis != null) try { fis.close(); } catch (Exception e) {}
        }
    }

+    public static void copy(File source, File dest) throws IOException {
+        copy(source,dest,-1);
+    }
+    
    /**
    * Copies a File to a File.
    * @param source    File
    * @param dest    File
+    * @param count the amount of bytes to copy
    * @see copy(InputStream source, OutputStream dest)
    * @see copy(InputStream source, File dest)
    * @see copyRange(File source, OutputStream dest, int start)
    * @see copy(File source, OutputStream dest)
    */
-    public static void copy(File source, File dest) throws IOException {
+    public static void copy(File source, File dest, long count) throws IOException {
        FileInputStream fis = null;
        FileOutputStream fos = null;
        try {
            fis = new FileInputStream(source);
            fos = new FileOutputStream(dest);
-            copy(fis, fos);
+            copy(fis, fos, count);
        } finally {
            if (fis != null) try {fis.close();} catch (Exception e) {}
            if (fos != null) try {fos.close();} catch (Exception e) {}
@ -250,7 +275,7 @@ public final class serverFileUtils {

    public static byte[] read(InputStream source) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        copy(source, baos);
+        copy(source, baos, -1);
        baos.close();
        return baos.toByteArray();
    }
@ -309,7 +334,7 @@ public final class serverFileUtils {
    }

    public static void write(byte[] source, OutputStream dest) throws IOException {
-        copy(new ByteArrayInputStream(source), dest);
+        copy(new ByteArrayInputStream(source), dest, -1);
    }

    public static void write(byte[] source, File dest) throws IOException {