From b6c7b915827edf6b61e8c506e22326312f2762db Mon Sep 17 00:00:00 2001
From: theli <theli@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 20 Sep 2006 12:25:07 +0000
Subject: [PATCH] *) Parser now throws an ParserException instead of returning
 null on parsing errors (e.g. needed by snippet fetcher) *) better logging of
 parser failures *) simplified usage of plasmaparser through switchboard *)
 restructuring of crawler    - crawler now returns an error message if it is
 used in sync mode (e.g. by snippet fetcher) *) snippet-fetcher: more verbose
 error messages *) serverByteBuffer.java: adding new function
 append(String,encoding) *) serverFileUtils.java: adding functions to copy
 only a given number of bytes between streams

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2641 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/Bookmarks.java                         |   2 +-
 htroot/ViewFile.html                          |   6 +-
 htroot/ViewFile.java                          | 295 +++++++++---------
 htroot/yacysearch.java                        |  18 +-
 .../plasma/crawler/AbstractCrawlWorker.java   |  30 +-
 .../crawler/plasmaCrawlerException.java       |   9 +
 .../anomic/plasma/parser/AbstractParser.java  |  38 ++-
 .../anomic/plasma/parser/ParserException.java |  31 +-
 .../anomic/plasma/parser/bzip/bzipParser.java |   6 +-
 .../anomic/plasma/parser/doc/docParser.java   |   8 +-
 .../anomic/plasma/parser/gzip/gzipParser.java |   6 +-
 .../parser/mimeType/mimeTypeParser.java       |  23 +-
 .../anomic/plasma/parser/odt/odtParser.java   |  28 +-
 .../anomic/plasma/parser/pdf/pdfParser.java   |  26 +-
 .../anomic/plasma/parser/rpm/rpmParser.java   |  22 +-
 .../anomic/plasma/parser/rss/rssParser.java   |  13 +-
 .../anomic/plasma/parser/rtf/rtfParser.java   |   6 +-
 .../anomic/plasma/parser/tar/tarParser.java   |  31 +-
 .../anomic/plasma/parser/vcf/vcfParser.java   |  10 +-
 .../anomic/plasma/parser/zip/zipParser.java   |  55 ++--
 .../de/anomic/plasma/plasmaCrawlLoader.java   |  19 +-
 .../plasma/plasmaCrawlLoaderMessage.java      |   9 +
 source/de/anomic/plasma/plasmaParser.java     |  89 ++++--
 .../de/anomic/plasma/plasmaSearchImages.java  |  14 +-
 .../de/anomic/plasma/plasmaSnippetCache.java  |  88 +++++-
 .../de/anomic/plasma/plasmaSwitchboard.java   |  62 ++--
 source/de/anomic/server/serverByteBuffer.java |   4 +
 source/de/anomic/server/serverFileUtils.java  |  53 +++-
 28 files changed, 637 insertions(+), 364 deletions(-)
 create mode 100644 source/de/anomic/plasma/crawler/plasmaCrawlerException.java
diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java
index f25acaa4e..96b7b0d15 100644
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@@ -51,11 +51,11 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 
-import de.anomic.net.URL;
 import de.anomic.data.bookmarksDB;
 import de.anomic.data.listManager;
 import de.anomic.data.bookmarksDB.Tag;
 import de.anomic.http.httpHeader;
+import de.anomic.net.URL;
 import de.anomic.plasma.plasmaCrawlLURL;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html
index 87830d891..9cccb1fdc 100644
--- a/htroot/ViewFile.html
+++ b/htroot/ViewFile.html
@@ -53,9 +53,11 @@ Unable to find URL Entry in DB
 :: <!-- 3 -->
 Invalid URL
 :: <!-- 4 -->
-Unable to download resource content.
+Unable to download resource content.<br> 
+<tt>#[errorText]#</tt>
 :: <!-- 5 -->
-Unable to parse resource content.
+Unable to parse resource content.<br>
+<tt>#[errorText]#</tt>
 :: <!-- 6 -->
 Unsupported protocol.
 #(/error)#
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index d28daa23a..ca76bf10a 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -57,6 +57,8 @@ import de.anomic.plasma.plasmaHTCache;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.crawler.plasmaCrawlerException;
+import de.anomic.plasma.parser.ParserException;
 import de.anomic.plasma.plasmaCrawlLURL.Entry;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@@ -83,174 +85,185 @@ public class ViewFile {
         serverObjects prop = new serverObjects();
         plasmaSwitchboard sb = (plasmaSwitchboard)env;     
 
-
-
-        if (post.containsKey("words"))
+        if (post != null && post.containsKey("words"))
             try {
                 prop.put("error_words",URLEncoder.encode((String) post.get("words"), "UTF-8"));
             } catch (UnsupportedEncodingException e1) {
-                // TODO Auto-generated catch block
-                e1.printStackTrace();
+                // ignore this. this should not occure
             }
 
-            if (post != null) {
-                // getting the url hash from which the content should be loaded
-                String urlHash = post.get("urlHash","");       
-                if (urlHash.equals("")) {
-                    prop.put("error",1);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;
-                }
 
-                String viewMode = post.get("viewMode","sentences");
-
-                // getting the urlEntry that belongs to the url hash
-                Entry urlEntry = null;
-                urlEntry = sb.urlPool.loadedURL.load(urlHash, null);
-                if (urlEntry == null) {
-                    prop.put("error",2);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;
-                }            
+            // getting the url hash from which the content should be loaded
+            String urlHash = post.get("urlHash","");       
+            if (urlHash.equals("")) {
+                prop.put("error",1);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop;
+            }
 
-                // gettin the url that belongs to the entry
-                URL url = urlEntry.url();
-                if (url == null) {
-                    prop.put("error",3);
-                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                    return prop;
-                }    
+            String viewMode = post.get("viewMode","sentences");
+
+            // getting the urlEntry that belongs to the url hash
+            Entry urlEntry = null;
+            urlEntry = sb.urlPool.loadedURL.load(urlHash, null);
+            if (urlEntry == null) {
+                prop.put("error",2);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop;
+            }            
+
+            // gettin the url that belongs to the entry
+            URL url = urlEntry.url();
+            if (url == null) {
+                prop.put("error",3);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop;
+            }    
+
+            // loading the resource content as byte array
+            byte[] resource = null;
+            IResourceInfo resInfo = null;
+            String resMime = null;
+            try {
+                // trying to load the resource body
+                resource = sb.cacheManager.loadResourceContent(url);
+
+                // if the resource body was not cached we try to load it from web
+                if (resource == null) {
+                    plasmaHTCache.Entry entry = null;
+                    try {
+                        entry = sb.snippetCache.loadResourceFromWeb(url, 5000);
+                    } catch (plasmaCrawlerException e) {
+                        prop.put("error",4);
+                        prop.put("error_errorText",e.getMessage());
+                        prop.put("viewMode",VIEW_MODE_NO_TEXT);                        
+                        return prop;
+                    }
 
-                // loading the resource content as byte array
-                byte[] resource = null;
-                IResourceInfo resInfo = null;
-                String resMime = null;
-                try {
-                    // trying to load the resource body
-                    resource = sb.cacheManager.loadResourceContent(url);
+                    if (entry != null) {
+                        resInfo = entry.getDocumentInfo();
+                        resource = sb.cacheManager.loadResourceContent(url);
+                    }
 
-                    // if the resource body was not cached we try to load it from web
                     if (resource == null) {
-                        plasmaHTCache.Entry entry = sb.snippetCache.loadResourceFromWeb(url, 5000);                 
+                        prop.put("error",4);
+                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                        return prop;
+                    } 
+                }
 
-                        if (entry != null) {
-                            resInfo = entry.getDocumentInfo();
-                            resource = sb.cacheManager.loadResourceContent(url);
+                // try to load resource metadata
+                if (resInfo == null) {
+
+                    // try to load the metadata from cache
+                    try {
+                        resInfo = sb.cacheManager.loadResourceInfo(urlEntry.url());
+                    } catch (Exception e) { /* ignore this */}
+
+                    // if the metadata where not cached try to load it from web
+                    if (resInfo == null) {
+                        String protocol = url.getProtocol();
+                        if (!((protocol.equals("http") || protocol.equals("https")))) {
+                            prop.put("error",6);
+                            prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                            return prop;                                
                         }
 
-                        if (resource == null) {
+                        httpHeader responseHeader = httpc.whead(url,url.getHost(),5000,null,null,sb.remoteProxyConfig);
+                        if (responseHeader == null) {
                             prop.put("error",4);
                             prop.put("viewMode",VIEW_MODE_NO_TEXT);
                             return prop;
                         } 
+                        resMime = responseHeader.mime();
                     }
+                } else {
+                    resMime = resInfo.getMimeType();
+                }
+            } catch (IOException e) {
+                prop.put("error",4);
+                prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                return prop; 
+            }    
+            if (viewMode.equals("plain")) {                
+                String content = new String(resource);
+                content = content.replaceAll("<","&lt;")
+                .replaceAll(">","&gt;")
+                .replaceAll("\"","&quot;")
+                .replaceAll("\n","<br>")
+                .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
 
-                    // try to load resource metadata
-                    if (resInfo == null) {
-
-                        // try to load the metadata from cache
-                        try {
-                            resInfo = sb.cacheManager.loadResourceInfo(urlEntry.url());
-                        } catch (Exception e) { /* ignore this */}
-
-                        // if the metadata where not cached try to load it from web
-                        if (resInfo == null) {
-                            String protocol = url.getProtocol();
-                            if (!((protocol.equals("http") || protocol.equals("https")))) {
-                                prop.put("error",6);
-                                prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                                return prop;                                
-                            }
-
-                            httpHeader responseHeader = httpc.whead(url,url.getHost(),5000,null,null,sb.remoteProxyConfig);
-                            if (responseHeader == null) {
-                                prop.put("error",4);
-                                prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                                return prop;
-                            } 
-                            resMime = responseHeader.mime();
-                        }
-                    } else {
-                        resMime = resInfo.getMimeType();
-                    }
-                } catch (IOException e) {
-                    if (url == null) {
-                        prop.put("error",4);
-                        prop.put("viewMode",VIEW_MODE_NO_TEXT);
-                        return prop;
-                    }   
-                }    
-                if (viewMode.equals("plain")) {                
-                    String content = new String(resource);
-                    content = content.replaceAll("<","&lt;")
-                    .replaceAll(">","&gt;")
-                    .replaceAll("\"","&quot;")
-                    .replaceAll("\n","<br>")
-                    .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
-
-                    prop.put("error",0);
-                    prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
-                    prop.put("viewMode_plainText",content);                     
-                } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
-                    // parsing the resource content
-                    plasmaParserDocument document = sb.snippetCache.parseDocument(url, resource,resInfo);
+                prop.put("error",0);
+                prop.put("viewMode",VIEW_MODE_AS_PLAIN_TEXT);
+                prop.put("viewMode_plainText",content);                     
+            } else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("iframe")) {
+                // parsing the resource content
+                plasmaParserDocument document = null;
+                try {
+                    document = sb.snippetCache.parseDocument(url, resource,resInfo);
                     if (document == null) {
                         prop.put("error",5);
+                        prop.put("error_errorText","Unknown error");
                         prop.put("viewMode",VIEW_MODE_NO_TEXT);
                         return prop;                
                     }
-                    resMime = document.getMimeType();
-
-                    if (viewMode.equals("parsed")) {
-                        String content = new String(document.getText());
-                        content = wikiCode.replaceHTML(content); //added by Marc Nause
-                        content = content.replaceAll("\n","<br>")
-                        .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
-
-                        prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
-                        prop.put("viewMode_parsedText",content);
-                    } else if (viewMode.equals("iframe")) {
-                        prop.put("viewMode",VIEW_MODE_AS_IFRAME);
-                        prop.put("viewMode_url",url.toString());
-                    } else {
-                        prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
-                        String[] sentences = document.getSentences();
-
-                        boolean dark = true;
-                        for (int i=0; i < sentences.length; i++) {
-                            String currentSentence = wikiCode.replaceHTML(sentences[i]);
-
-                            // Search word highlighting
-                            String words = post.get("words",null);
-                            if (words != null) {
-                                try {
-                                    words = URLDecoder.decode(words,"UTF-8");
-                                } catch (UnsupportedEncodingException e) {}
-
-                                String[] wordArray = words.substring(1,words.length()-1).split(",");
-                                for (int j=0; j < wordArray.length; j++) {
-                                    String currentWord = wordArray[j].trim(); 
-                                    currentSentence = currentSentence.replaceAll(currentWord,
-                                            "<b style=\"color: black; background-color: rgb(" + highlightingColors[j%6] + ");\">" + currentWord + "</b>");
-                                }
-                            }
+                } catch (ParserException e) {
+                    prop.put("error",5);
+                    prop.put("error_errorText",e.getMessage());
+                    prop.put("viewMode",VIEW_MODE_NO_TEXT);
+                    return prop;     
+                }
+                resMime = document.getMimeType();
 
-                            prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1)); 
-                            prop.put("viewMode_sentences_" + i + "_text",currentSentence);   
-                            prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
+                if (viewMode.equals("parsed")) {
+                    String content = new String(document.getText());
+                    content = wikiCode.replaceHTML(content); //added by Marc Nause
+                    content = content.replaceAll("\n","<br>")
+                    .replaceAll("\t","&nbsp;&nbsp;&nbsp;&nbsp;");
+
+                    prop.put("viewMode",VIEW_MODE_AS_PARSED_TEXT);
+                    prop.put("viewMode_parsedText",content);
+                } else if (viewMode.equals("iframe")) {
+                    prop.put("viewMode",VIEW_MODE_AS_IFRAME);
+                    prop.put("viewMode_url",url.toString());
+                } else {
+                    prop.put("viewMode",VIEW_MODE_AS_PARSED_SENTENCES);
+                    String[] sentences = document.getSentences();
+
+                    boolean dark = true;
+                    for (int i=0; i < sentences.length; i++) {
+                        String currentSentence = wikiCode.replaceHTML(sentences[i]);
+
+                        // Search word highlighting
+                        String words = post.get("words",null);
+                        if (words != null) {
+                            try {
+                                words = URLDecoder.decode(words,"UTF-8");
+                            } catch (UnsupportedEncodingException e) {}
+
+                            String[] wordArray = words.substring(1,words.length()-1).split(",");
+                            for (int j=0; j < wordArray.length; j++) {
+                                String currentWord = wordArray[j].trim(); 
+                                currentSentence = currentSentence.replaceAll(currentWord,
+                                        "<b style=\"color: black; background-color: rgb(" + highlightingColors[j%6] + ");\">" + currentWord + "</b>");
+                            }
                         }
-                        prop.put("viewMode_sentences",sentences.length);
 
-                    } 
-                }
-                prop.put("error",0);
-                prop.put("error_url",url.toString());                
-                prop.put("error_hash",urlHash);
-                prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
-                prop.put("error_desc",urlEntry.descr());
-                prop.put("error_size",urlEntry.size());
-                prop.put("error_mimeType",resMime);
-            }        
+                        prop.put("viewMode_sentences_" + i + "_nr",Integer.toString(i+1)); 
+                        prop.put("viewMode_sentences_" + i + "_text",currentSentence);   
+                        prop.put("viewMode_sentences_" + i + "_dark",((dark) ? 1 : 0) ); dark=!dark;
+                    }
+                    prop.put("viewMode_sentences",sentences.length);
+
+                } 
+            }
+            prop.put("error",0);
+            prop.put("error_url",url.toString());                
+            prop.put("error_hash",urlHash);
+            prop.put("error_wordCount",Integer.toString(urlEntry.wordCount()));
+            prop.put("error_desc",urlEntry.descr());
+            prop.put("error_size",urlEntry.size());
+            prop.put("error_mimeType",resMime);
 
             return prop;
     }
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 3d56f9eb4..72adb9831 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -56,6 +56,7 @@ import de.anomic.htmlFilter.htmlFilterImageEntry;
 import de.anomic.http.httpHeader;
 import de.anomic.kelondro.kelondroMSetTools;
 import de.anomic.kelondro.kelondroNaturalOrder;
+import de.anomic.net.URL;
 import de.anomic.plasma.plasmaCrawlLURL;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.plasmaSearchImages;
@@ -64,7 +65,6 @@ import de.anomic.plasma.plasmaSearchQuery;
 import de.anomic.plasma.plasmaSearchRankingProfile;
 import de.anomic.plasma.plasmaSearchTimingProfile;
 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.net.URL;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverDate;
 import de.anomic.server.serverObjects;
@@ -192,13 +192,15 @@ public class yacysearch {
                 plasmaCrawlLURL.Entry urlentry = sb.urlPool.loadedURL.load(recommendHash, null);
                 if (urlentry != null) {
                     plasmaParserDocument document = sb.snippetCache.retrieveDocument(urlentry.url(), true);
-                    // create a news message
-                    HashMap map = new HashMap();
-                    map.put("url", urlentry.url().toNormalform().replace(',', '|'));
-                    map.put("title", urlentry.descr().replace(',', ' '));
-                    map.put("description", ((document == null) ? urlentry.descr() : document.getMainLongTitle()).replace(',', ' '));
-                    map.put("tags",  ((document == null) ? "" : document.getKeywords(' ')));
-                    yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map));
+                    if (document != null) {
+                        // create a news message
+                        HashMap map = new HashMap();
+                        map.put("url", urlentry.url().toNormalform().replace(',', '|'));
+                        map.put("title", urlentry.descr().replace(',', ' '));
+                        map.put("description", ((document == null) ? urlentry.descr() : document.getMainLongTitle()).replace(',', ' '));
+                        map.put("tags",  ((document == null) ? "" : document.getKeywords(' ')));
+                        yacyCore.newsPool.publishMyNews(new yacyNewsRecord("stippadd", map));
+                    }
                 }
             }
 
diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
index 7889df481..2df4f4d4b 100644
--- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
+++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
@@ -93,6 +93,8 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
     protected plasmaCrawlProfile.entry profile;  
     protected boolean acceptAllContent;
     
+    protected String errorMessage;
+    
     /**
      * The crawler thread pool
      */
@@ -186,6 +188,8 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
     }    
     
     public void execute() {
+        
+        plasmaHTCache.Entry loadedResource = null;
         try {
             // setting threadname
             this.setName(plasmaCrawlWorker.threadBaseName + "_" + this.url);
@@ -194,15 +198,23 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
             init();
 
             // loading resource
-            plasmaHTCache.Entry resource = load();
+            loadedResource = load();
+        } catch (IOException e) {
+            //throw e;
+        } finally {                        
+            // setting the error message (if available)
+            if (this.errorMessage != null) {
+                this.theMsg.setError(this.errorMessage);
+            }
             
             // store a reference to the result in the message object
             // this is e.g. needed by the snippet fetcher
-            this.theMsg.setResult(resource);
-
-        } catch (IOException e) {
-            //throw e;
-        } finally {
+            //
+            // Note: this is always called, even on empty results.
+            //       Otherwise the caller will block forever
+            this.theMsg.setResult(loadedResource);            
+            
+            // signal that this worker thread has finished the job
             this.done = true;
         }
     }    
@@ -256,9 +268,13 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
         this.startdate = 0;
         this.profile = null;
         this.acceptAllContent = false;
+        this.errorMessage = null;
     }    
     
-    protected void addURLtoErrorDB(String failreason) {        
+    protected void addURLtoErrorDB(String failreason) { 
+        // remember error message
+        this.errorMessage = failreason;
+        
         // convert the referrer URL into a hash value
         String referrerHash = (this.refererURLString==null)?null:indexURL.urlHash(this.refererURLString);
         
diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlerException.java b/source/de/anomic/plasma/crawler/plasmaCrawlerException.java
new file mode 100644
index 000000000..165dd4e78
--- /dev/null
+++ b/source/de/anomic/plasma/crawler/plasmaCrawlerException.java
@@ -0,0 +1,9 @@
+package de.anomic.plasma.crawler;
+
+import java.io.IOException;
+
+public class plasmaCrawlerException extends IOException {
+    public plasmaCrawlerException(String errorMsg) {
+        super(errorMsg);
+    }
+}
diff --git a/source/de/anomic/plasma/parser/AbstractParser.java b/source/de/anomic/plasma/parser/AbstractParser.java
index c69c60496..9507e5ca5 100644
--- a/source/de/anomic/plasma/parser/AbstractParser.java
+++ b/source/de/anomic/plasma/parser/AbstractParser.java
@@ -49,6 +49,7 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.io.InputStream;
 import de.anomic.net.URL;
 
@@ -93,12 +94,35 @@ public abstract class AbstractParser implements Parser{
         this.libxDependencies = libxDependencies;
 	}
 
+    /**
+     * Check if the parser was interrupted.
+     * @throws InterruptedException if the parser was interrupted
+     */
     public static final void checkInterruption() throws InterruptedException {
         Thread currentThread = Thread.currentThread();
         if ((currentThread instanceof serverThread) && ((serverThread)currentThread).shutdownInProgress()) throw new InterruptedException("Shutdown in progress ...");
         if (currentThread.isInterrupted()) throw new InterruptedException("Shutdown in progress ...");    
     }
     
+    public final File createTempFile(String name) throws IOException {
+        String parserClassName = this.getClass().getName();
+        int idx = parserClassName.lastIndexOf(".");
+        if (idx != -1) {
+            parserClassName = parserClassName.substring(idx+1);
+        } 
+                    
+        // getting the file extension
+        idx = name.lastIndexOf("/");
+        String fileName = (idx != -1) ? name.substring(idx+1) : name;        
+        
+        idx = fileName.lastIndexOf(".");
+        String fileExt = (idx > -1) ? fileName.substring(idx+1) : "";
+        
+        // creates the temp file
+        File tempFile = File.createTempFile(parserClassName + "_" + ((idx>-1)?fileName.substring(0,idx):fileName), (fileExt.length()>0)?"."+fileExt:fileExt);
+        return tempFile;
+    }
+    
 	/**
 	 * Parsing a document available as byte array.
      * @param location the origin of the document 
@@ -119,14 +143,17 @@ public abstract class AbstractParser implements Parser{
     ) throws ParserException, InterruptedException {
         ByteArrayInputStream contentInputStream = null;
         try {
+            // convert the byte array into a stream
             contentInputStream = new ByteArrayInputStream(source);
+            
+            // parse the stream
             return this.parse(location,mimeType,charset,contentInputStream); 
         } finally {
             if (contentInputStream != null) {
                 try {
                     contentInputStream.close();
                     contentInputStream = null;
-                } catch (Exception e){}
+                } catch (Exception e){ /* ignore this */}
             }
         }
 	}
@@ -151,12 +178,15 @@ public abstract class AbstractParser implements Parser{
 	) throws ParserException, InterruptedException {
         BufferedInputStream contentInputStream = null;
         try {
+            // create a stream from the file
             contentInputStream = new BufferedInputStream(new FileInputStream(sourceFile));
+            
+            // parse the stream
             return this.parse(location, mimeType, charset, contentInputStream);
         } catch (FileNotFoundException e) {
-            throw new ParserException(e.getMessage());
+            throw new ParserException("Unexpected error while parsing file. " + e.getMessage(),location); 
         } finally {
-            if (contentInputStream != null) try{contentInputStream.close();}catch(Exception e){}
+            if (contentInputStream != null) try{contentInputStream.close();}catch(Exception e){/* ignore this */}
         }
 	}
     
@@ -201,6 +231,6 @@ public abstract class AbstractParser implements Parser{
      * Return the name of the parser
      */
     public String getName() {
-        return parserName;
+        return this.parserName;
     }
 }
diff --git a/source/de/anomic/plasma/parser/ParserException.java b/source/de/anomic/plasma/parser/ParserException.java
index cdb730ec6..c05d9a484 100644
--- a/source/de/anomic/plasma/parser/ParserException.java
+++ b/source/de/anomic/plasma/parser/ParserException.java
@@ -44,24 +44,45 @@
 
 package de.anomic.plasma.parser;
 
+import de.anomic.net.URL;
+import de.anomic.plasma.plasmaCrawlEURL;
+
 public class ParserException extends Exception
 {
-
+    private String errorCode = null;
+    private URL url = null;
+    
 	private static final long serialVersionUID = 1L;
 
 	public ParserException() {
         super();
     }
 
-    public ParserException(String message) {
+    public ParserException(String message, URL url) {
+        this(message,url,plasmaCrawlEURL.DENIED_PARSER_ERROR);
+    }    
+    
+    public ParserException(String message, URL url, String errorCode) {
         super(message);
+        this.errorCode = errorCode;
+        this.url = url;
     }
 
-    public ParserException(String message, Throwable cause) {
+    public ParserException(String message, URL url, Throwable cause) {
+        this(message,url,cause,plasmaCrawlEURL.DENIED_PARSER_ERROR);
+    }
+    
+    public ParserException(String message, URL url, Throwable cause, String errorCode) {
         super(message, cause);
+        this.errorCode = errorCode;
+        this.url = url;
     }
 
-    public ParserException(Throwable cause) {
-        super(cause);
+    public String getErrorCode() {
+        return this.errorCode;
+    }
+    
+    public URL getURL() {
+        return this.url;
     }
 }
diff --git a/source/de/anomic/plasma/parser/bzip/bzipParser.java b/source/de/anomic/plasma/parser/bzip/bzipParser.java
index 8b2020c81..53aa52e40 100644
--- a/source/de/anomic/plasma/parser/bzip/bzipParser.java
+++ b/source/de/anomic/plasma/parser/bzip/bzipParser.java
@@ -80,7 +80,7 @@ public class bzipParser extends AbstractParser implements Parser {
     
     public bzipParser() {        
         super(LIBX_DEPENDENCIES);
-        parserName = "Bzip 2 UNIX Compressed File Parser";
+        this.parserName = "Bzip 2 UNIX Compressed File Parser";
     }
     
     public Hashtable getSupportedMimeTypes() {
@@ -129,7 +129,9 @@ public class bzipParser extends AbstractParser implements Parser {
             return theParser.parseSource(location,null,null,tempFile);
         } catch (Exception e) {  
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the gzip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing bzip file. " + e.getMessage(),location);
         } finally {
             if (tempFile != null) tempFile.delete();
         }
diff --git a/source/de/anomic/plasma/parser/doc/docParser.java b/source/de/anomic/plasma/parser/doc/docParser.java
index 46aa1196a..2a89dbfee 100644
--- a/source/de/anomic/plasma/parser/doc/docParser.java
+++ b/source/de/anomic/plasma/parser/doc/docParser.java
@@ -75,7 +75,7 @@ implements Parser {
     
 	public docParser() {
 		super(LIBX_DEPENDENCIES);
-        parserName = "Word Document Parser";
+        this.parserName = "Word Document Parser";
 	}
 
 	public plasmaParserDocument parse(URL location, String mimeType, String charset,
@@ -99,14 +99,16 @@ implements Parser {
                       null,
                       null,
                       null,
-                      contents.getBytes(),
+                      contents.getBytes("UTF-8"),
                       null,
                       null);
               
               return theDoc;             
 		} catch (Exception e) {			
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-			throw new ParserException("Unable to parse the doc content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing doc file. " + e.getMessage(),location);            
 		}        
 	}
 
diff --git a/source/de/anomic/plasma/parser/gzip/gzipParser.java b/source/de/anomic/plasma/parser/gzip/gzipParser.java
index abc58e26e..389795372 100644
--- a/source/de/anomic/plasma/parser/gzip/gzipParser.java
+++ b/source/de/anomic/plasma/parser/gzip/gzipParser.java
@@ -76,7 +76,7 @@ public class gzipParser extends AbstractParser implements Parser {
     
     public gzipParser() {        
         super(LIBX_DEPENDENCIES);
-        parserName = "GNU Zip Compressed Archive Parser";
+        this.parserName = "GNU Zip Compressed Archive Parser";
     }
     
     public Hashtable getSupportedMimeTypes() {
@@ -113,7 +113,9 @@ public class gzipParser extends AbstractParser implements Parser {
             return theParser.parseSource(location,null,null,tempFile);
         } catch (Exception e) {    
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the gzip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing gzip file. " + e.getMessage(),location); 
         } finally {
             if (tempFile != null) tempFile.delete();
         }
diff --git a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
index f2b86124f..6d5eabc33 100644
--- a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
+++ b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
@@ -44,6 +44,7 @@
 package de.anomic.plasma.parser.mimeType;
 
 import java.io.File;
+import java.io.IOException;
 import java.io.InputStream;
 import de.anomic.net.URL;
 import java.util.Collection;
@@ -99,7 +100,7 @@ implements Parser {
     
     public mimeTypeParser() {
         super(LIBX_DEPENDENCIES);
-        parserName = "MimeType Parser"; 
+        this.parserName = "MimeType Parser"; 
     }
     
     public String getMimeType (File sourceFile) {
@@ -142,8 +143,8 @@ implements Parser {
             threadLoopDetection.put(Thread.currentThread(),new Integer(loopDepth.intValue()+1));
             
             // deactivating the logging for jMimeMagic
-            Logger theLogger = Logger.getLogger("net.sf.jmimemagic");
-            theLogger.setLevel(Level.OFF);
+            Logger jmimeMagicLogger = Logger.getLogger("net.sf.jmimemagic");
+            jmimeMagicLogger.setLevel(Level.OFF);
             
             Magic theMagic = new Magic();           
             MagicMatch match = theMagic.getMagicMatch(sourceFile);
@@ -160,8 +161,8 @@ implements Parser {
                 }
                 
                 // to avoid loops we have to test if the mimetype has changed ...
-                if (this.getSupportedMimeTypes().containsKey(mimeType)) return null;
-                if (orgMimeType.equals(mimeType)) return null;
+                if (this.getSupportedMimeTypes().containsKey(mimeType)) throw new ParserException("Unable to detect mimetype of resource.",location);
+                if (orgMimeType.equals(mimeType)) throw new ParserException("Unable to detect mimetype of resource.",location);
                                 
                 // check for interruption
                 checkInterruption();
@@ -170,11 +171,13 @@ implements Parser {
                 plasmaParser theParser = new plasmaParser();
                 return theParser.parseSource(location,mimeType,charset,sourceFile);
             }
-            return null;
+            throw new ParserException("Unable to detect mimetype of resource.",location);
             
         } catch (Exception e) {
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            return null;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while detect mimetype of resource. " + e.getMessage(),location); 
         } finally {
             Integer loopDepth = (Integer) threadLoopDetection.get(Thread.currentThread());                
             if (loopDepth.intValue() <= 1) {
@@ -186,14 +189,14 @@ implements Parser {
     }
     
     public plasmaParserDocument parse(URL location, String mimeType,String charset,
-            InputStream source) throws ParserException {
+            InputStream source) throws ParserException, InterruptedException {
         File dstFile = null;
         try {
             dstFile = File.createTempFile("mimeTypeParser",".tmp");
             serverFileUtils.copy(source,dstFile);
             return parse(location,mimeType,charset,dstFile);
-        } catch (Exception e) {            
-            return null;
+        } catch (IOException e) {
+            throw new ParserException("Unexpected error while detect mimetype of resource. " + e.getMessage(),location);
         } finally {
             if (dstFile != null) {dstFile.delete();}            
         }
diff --git a/source/de/anomic/plasma/parser/odt/odtParser.java b/source/de/anomic/plasma/parser/odt/odtParser.java
index f8a9a10be..a2b1b8cbd 100644
--- a/source/de/anomic/plasma/parser/odt/odtParser.java
+++ b/source/de/anomic/plasma/parser/odt/odtParser.java
@@ -84,7 +84,7 @@ public class odtParser extends AbstractParser implements Parser {
     
     public odtParser() {        
         super(LIBX_DEPENDENCIES);
-        parserName = "OASIS OpenDocument V2 Text Document Parser"; 
+        this.parserName = "OASIS OpenDocument V2 Text Document Parser"; 
     }
     
     public Hashtable getSupportedMimeTypes() {
@@ -96,7 +96,7 @@ public class odtParser extends AbstractParser implements Parser {
         try {          
             byte[] docContent     = null;
             String docDescription = null;
-            String docKeywords    = null;
+            String docKeywordStr    = null;
             String docShortTitle  = null;
             String docLongTitle   = null;
             
@@ -125,7 +125,7 @@ public class odtParser extends AbstractParser implements Parser {
                     ODFMetaFileAnalyzer metaAnalyzer = new ODFMetaFileAnalyzer();
                     OpenDocumentMetadata metaData = metaAnalyzer.analyzeMetaData(zipFileEntryStream);
                     docDescription = metaData.getDescription();
-                    docKeywords    = metaData.getKeyword();
+                    docKeywordStr    = metaData.getKeyword();
                     docShortTitle  = metaData.getTitle();
                     docLongTitle   = metaData.getSubject();
                     
@@ -149,11 +149,16 @@ public class odtParser extends AbstractParser implements Parser {
                 }
             }
          
+            // split the keywords
+            String[] docKeywords = null;
+            if (docKeywordStr != null) docKeywords = docKeywordStr.split(" |,");
+            
+            // create the parser document
             return new plasmaParserDocument(
                     location,
                     mimeType,
                     "UTF-8",
-                    docKeywords.split(" |,"),
+                    docKeywords,
                     docShortTitle, 
                     docLongTitle,
                     null,
@@ -163,13 +168,13 @@ public class odtParser extends AbstractParser implements Parser {
                     null);
         } catch (Exception e) {            
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the odt content. " + e.getMessage());
-        } catch (Error e) {
-            throw new ParserException("Unable to parse the odt content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing odt file. " + e.getMessage(),location); 
         }
     }
     
-    public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException {
+    public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
         File dest = null;
         try {
             // creating a tempfile
@@ -182,9 +187,12 @@ public class odtParser extends AbstractParser implements Parser {
             // parsing the content
             return parse(location, mimeType, charset, dest);
         } catch (Exception e) {
-            throw new ParserException("Unable to parse the odt document. " + e.getMessage());
+            if (e instanceof InterruptedException) throw (InterruptedException) e;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing odt file. " + e.getMessage(),location); 
         } finally {
-            if (dest != null) try { dest.delete(); } catch (Exception e){}
+            if (dest != null) try { dest.delete(); } catch (Exception e){/* ignore this */}
         }
     }
     
diff --git a/source/de/anomic/plasma/parser/pdf/pdfParser.java b/source/de/anomic/plasma/parser/pdf/pdfParser.java
index 298a87f41..1b67fceb4 100644
--- a/source/de/anomic/plasma/parser/pdf/pdfParser.java
+++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java
@@ -78,7 +78,7 @@ public class pdfParser extends AbstractParser implements Parser {
     
     public pdfParser() {        
         super(LIBX_DEPENDENCIES);
-        parserName = "Acrobat Portable Document Parser"; 
+        this.parserName = "Acrobat Portable Document Parser"; 
     }
     
     public Hashtable getSupportedMimeTypes() {
@@ -98,7 +98,7 @@ public class pdfParser extends AbstractParser implements Parser {
 //            Logger theLogger = Logger.getLogger("org.pdfbox");
 //            theLogger.setLevel(Level.INFO);            
             
-            String docTitle = null, docSubject = null, /*docAuthor = null,*/ docKeyWords = null;
+            String docTitle = null, docSubject = null, /*docAuthor = null,*/ docKeywordStr = null;
             
             // check for interruption
             checkInterruption();
@@ -120,7 +120,7 @@ public class pdfParser extends AbstractParser implements Parser {
                 docTitle = theDocInfo.getTitle();
                 docSubject = theDocInfo.getSubject();
                 //docAuthor = theDocInfo.getAuthor();
-                docKeyWords = theDocInfo.getKeywords();
+                docKeywordStr = theDocInfo.getKeywords();
             }
             
             serverByteBuffer out = new serverByteBuffer();
@@ -142,18 +142,14 @@ public class pdfParser extends AbstractParser implements Parser {
                 replaceAll("\t"," ");                
             }
             
-            /*
-             *         public document(URL location, String mimeType,
-                            String keywords, String shortTitle, String longTitle,
-                            String[] sections, String abstrct,
-                            byte[] text, Map anchors, Map images) {
-             * 
-             */            
+            String[] docKeywords = null;
+            if (docKeywordStr != null) docKeywords = docKeywordStr.split(" |,");
+            
             plasmaParserDocument theDoc = new plasmaParserDocument(
                     location,
                     mimeType,
                     "UTF-8",
-                    docKeyWords.split(" |,"),
+                    docKeywords,
                     docSubject,
                     docTitle,
                     null,
@@ -166,10 +162,12 @@ public class pdfParser extends AbstractParser implements Parser {
         }
         catch (Exception e) {       
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the pdf content. " + e.getMessage(),e);
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing pdf file. " + e.getMessage(),location); 
         } finally {
-            if (theDocument != null) try { theDocument.close(); } catch (Exception e) {}
-            if (writer != null)      try { writer.close(); }      catch (Exception e) {}
+            if (theDocument != null) try { theDocument.close(); } catch (Exception e) {/* ignore this */}
+            if (writer != null)      try { writer.close(); }      catch (Exception e) {/* ignore this */}
             Thread.currentThread().setPriority(Thread.NORM_PRIORITY);
         }
     }
diff --git a/source/de/anomic/plasma/parser/rpm/rpmParser.java b/source/de/anomic/plasma/parser/rpm/rpmParser.java
index 7e117f4f5..eef4ca2fb 100644
--- a/source/de/anomic/plasma/parser/rpm/rpmParser.java
+++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java
@@ -84,7 +84,7 @@ public class rpmParser extends AbstractParser implements Parser {
     
     public rpmParser() {        
         super(LIBX_DEPENDENCIES);
-        parserName = "rpm Parser"; 
+        this.parserName = "rpm Parser"; 
     }
     
     public Hashtable getSupportedMimeTypes() {
@@ -126,12 +126,12 @@ public class rpmParser extends AbstractParser implements Parser {
                 
                 // getting the next tag
                 DataTypeIf tag = rpmFile.getTag(headerNames[i]);
-                if (tag != null) {
-                    content.append(headerNames[i])
-                               .append(": ")
-                               .append(tag.toString())
-                               .append("\n");
-                }
+                if (tag == null) continue;
+                
+                content.append(headerNames[i])
+                .append(": ")
+                .append(tag.toString())
+                .append("\n");
                 
                 if (headerNames[i].equals("N")) name = tag.toString();
                 else if (headerNames[i].equals("SUMMARY")) summary = tag.toString();
@@ -153,16 +153,18 @@ public class rpmParser extends AbstractParser implements Parser {
                     summary,
                     null,
                     description,
-                    content.toString().getBytes(),
+                    content.toString().getBytes("UTF-8"),
                     anchors,
                     null); 
             
             return theDoc;
         } catch (Exception e) {
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the rpm file. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing rpm file. " + e.getMessage(),location); 
         } finally {
-            if (rpmFile != null) try { rpmFile.close(); } catch (Exception e) {}
+            if (rpmFile != null) try { rpmFile.close(); } catch (Exception e) {/* ignore this */}
         }
     }
     
diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java
index 299e3f865..41cf8573b 100644
--- a/source/de/anomic/plasma/parser/rss/rssParser.java
+++ b/source/de/anomic/plasma/parser/rss/rssParser.java
@@ -98,7 +98,7 @@ public class rssParser extends AbstractParser implements Parser {
     
 	public rssParser() {
 		super(LIBX_DEPENDENCIES);
-        parserName = "Rich Site Summary/Atom Feed Parser"; 
+        this.parserName = "Rich Site Summary/Atom Feed Parser"; 
 	}
 
 	public plasmaParserDocument parse(URL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
@@ -149,7 +149,7 @@ public class rssParser extends AbstractParser implements Parser {
                     anchors.put(itemURL.toString(),itemTitle);
                     
                 	if ((text.length() != 0) && (text.byteAt(text.length() - 1) != 32)) text.append((byte) 32);
-                	text.append(new serverCharBuffer(htmlFilterAbstractScraper.stripAll(new serverCharBuffer(itemDescr.toCharArray()))).trim()).append(' '); // TODO: this does not work for utf-8
+                	text.append(new serverCharBuffer(htmlFilterAbstractScraper.stripAll(new serverCharBuffer(itemDescr.toCharArray()))).trim().toString()).append(' ');
                     
                     String itemContent = item.getElementValue("content");
                     if ((itemContent != null) && (itemContent.length() > 0)) {
@@ -183,11 +183,6 @@ public class rssParser extends AbstractParser implements Parser {
                 }
             }
             
-	        /* (URL location, String mimeType,
-                    String keywords, String shortTitle, String longTitle,
-                    String[] sections, String abstrct,
-                    byte[] text, Map anchors, Map images)
-            */
             plasmaParserDocument theDoc = new plasmaParserDocument(
                     location,
                     mimeType,
@@ -205,7 +200,9 @@ public class rssParser extends AbstractParser implements Parser {
             
         } catch (Exception e) {
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the rss file. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing rss file." + e.getMessage(),location); 
         }
 	}
 
diff --git a/source/de/anomic/plasma/parser/rtf/rtfParser.java b/source/de/anomic/plasma/parser/rtf/rtfParser.java
index fdef82b99..4fa5d3028 100644
--- a/source/de/anomic/plasma/parser/rtf/rtfParser.java
+++ b/source/de/anomic/plasma/parser/rtf/rtfParser.java
@@ -77,7 +77,7 @@ implements Parser {
     
 	public rtfParser() {
 		super(LIBX_DEPENDENCIES);
-        parserName = "Rich Text Format Parser";  
+        this.parserName = "Rich Text Format Parser";  
 	}
 
 	public plasmaParserDocument parse(URL location, String mimeType, String charset,
@@ -113,7 +113,9 @@ implements Parser {
 		}
 		catch (Exception e) {			
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-			throw new ParserException("Unable to parse the rdf content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing rtf resource." + e.getMessage(),location); 
 		}        
 	}
 
diff --git a/source/de/anomic/plasma/parser/tar/tarParser.java b/source/de/anomic/plasma/parser/tar/tarParser.java
index ba30acc91..c70c4e26c 100644
--- a/source/de/anomic/plasma/parser/tar/tarParser.java
+++ b/source/de/anomic/plasma/parser/tar/tarParser.java
@@ -87,7 +87,7 @@ public class tarParser extends AbstractParser implements Parser {
     
     public tarParser() {        
         super(LIBX_DEPENDENCIES);
-        parserName = "Tape Archive File Parser"; 
+        this.parserName = "Tape Archive File Parser"; 
     }
     
     public Hashtable getSupportedMimeTypes() {
@@ -128,12 +128,11 @@ public class tarParser extends AbstractParser implements Parser {
                 // skip directories
                 if (entry.isDirectory()) continue;
                 
-                // Get the entry name
-                int idx = -1;
+                // Get the short entry name
                 String entryName = entry.getName();
-                idx = entryName.lastIndexOf("/");
-                if (idx != -1) entryName = entryName.substring(idx+1);
-                idx = entryName.lastIndexOf(".");
+                
+                // getting the entry file extension
+                int idx = entryName.lastIndexOf(".");
                 String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                 
                 // trying to determine the mimeType per file extension   
@@ -143,19 +142,21 @@ public class tarParser extends AbstractParser implements Parser {
                 plasmaParserDocument theDoc = null;
                 File tempFile = null;
                 try {
-                    byte[] buf = new byte[(int) entry.getSize()];
-                    /*int bytesRead =*/ tin.read(buf);
-
-                    tempFile = File.createTempFile("tarParser_" + ((idx>-1)?entryName.substring(0,idx):entryName), (entryExt.length()>0)?"."+entryExt:entryExt);
-                    serverFileUtils.write(buf, tempFile);           
+                    // create the temp file
+                    tempFile = createTempFile(entryName);
+                    
+                    // copy the data into the file
+                    serverFileUtils.copy(tin,tempFile,entry.getSize());
                     
                     // check for interruption
                     checkInterruption();
                     
                     // parsing the content                    
-                    theDoc = theParser.parseSource(new URL(tempFile),entryMime,null,tempFile);
+                    theDoc = theParser.parseSource(new URL(location,"#" + entryName),entryMime,null,tempFile);
+                } catch (ParserException e) {
+                    this.theLogger.logInfo("Unable to parse tar file entry '" + entryName + "'. " + e.getErrorCode());
                 } finally {
-                    if (tempFile != null) try {tempFile.delete(); } catch(Exception ex){}
+                    if (tempFile != null) try {tempFile.delete(); } catch(Exception ex){/* ignore this */}
                 }
                 if (theDoc == null) continue;
                 
@@ -200,7 +201,9 @@ public class tarParser extends AbstractParser implements Parser {
                     docImages);
         } catch (Exception e) {
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the zip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing tar resource. " + e.getMessage(),location); 
         }
     }
     
diff --git a/source/de/anomic/plasma/parser/vcf/vcfParser.java b/source/de/anomic/plasma/parser/vcf/vcfParser.java
index 1dc963e95..f92835236 100644
--- a/source/de/anomic/plasma/parser/vcf/vcfParser.java
+++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java
@@ -215,7 +215,7 @@ public class vcfParser extends AbstractParser implements Parser {
                             URL newURL = new URL(value);
                             anchors.put(newURL.toString(),newURL.toString());   
                             //parsedData.put(key,value);
-                        } catch (MalformedURLException ex) {}                                                
+                        } catch (MalformedURLException ex) {/* ignore this */}                                                
                     } else if (
                             !key.equalsIgnoreCase("BEGIN") &&
                             !key.equalsIgnoreCase("END") &&
@@ -255,12 +255,10 @@ public class vcfParser extends AbstractParser implements Parser {
             return theDoc;
         } catch (Exception e) { 
             if (e instanceof InterruptedException) throw (InterruptedException) e;
+            if (e instanceof ParserException) throw (ParserException) e;
             
-            String errorMsg = "Unable to parse the vcard content. " + e.getMessage();
-            this.theLogger.logSevere(errorMsg);            
-            throw new ParserException(errorMsg);
-        } finally {
-        }
+            throw new ParserException("Unexpected error while parsing vcf resource. " + e.getMessage(),location);
+        } 
     }
     
     public void reset() {
diff --git a/source/de/anomic/plasma/parser/zip/zipParser.java b/source/de/anomic/plasma/parser/zip/zipParser.java
index c6d07a66e..7b55085d8 100644
--- a/source/de/anomic/plasma/parser/zip/zipParser.java
+++ b/source/de/anomic/plasma/parser/zip/zipParser.java
@@ -43,9 +43,8 @@
 
 package de.anomic.plasma.parser.zip;
 
-import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.InputStream;
-import de.anomic.net.URL;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Hashtable;
@@ -55,12 +54,14 @@ import java.util.TreeSet;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
+import de.anomic.net.URL;
 import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaParserDocument;
 import de.anomic.plasma.parser.AbstractParser;
 import de.anomic.plasma.parser.Parser;
 import de.anomic.plasma.parser.ParserException;
 import de.anomic.server.serverByteBuffer;
+import de.anomic.server.serverFileUtils;
 
 public class zipParser extends AbstractParser implements Parser {
 
@@ -84,7 +85,7 @@ public class zipParser extends AbstractParser implements Parser {
     
     public zipParser() {        
         super(LIBX_DEPENDENCIES);
-        parserName = "Compressed Archive File Parser"; 
+        this.parserName = "Compressed Archive File Parser"; 
     }
     
     public Hashtable getSupportedMimeTypes() {
@@ -110,29 +111,39 @@ public class zipParser extends AbstractParser implements Parser {
             ZipEntry entry;
             ZipInputStream zippedContent = new ZipInputStream(source);                      
             while ((entry = zippedContent.getNextEntry()) !=null) {
+                // check for interruption
+                checkInterruption();                
+                
                 // skip directories
                 if (entry.isDirectory()) continue;
                 
                 // Get the entry name
                 String entryName = entry.getName();                
                 int idx = entryName.lastIndexOf(".");
-                String entryExt = (idx > -1) ? entryName.substring(idx+1) : null;
-                
-                // trying to determine the mimeType per file extension   
-                String entryMime = plasmaParser.getMimeTypeByFileExt(entryExt);
                 
-                // getting the entry content
-                ByteArrayOutputStream bos = new ByteArrayOutputStream();
-                byte[] buf = new byte[(int) entry.getSize()];
-                /*int bytesRead =*/ zippedContent.read(buf);
-                bos.write(buf);
-                byte[] ut = bos.toByteArray();           
+                // getting the file extension
+                String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                 
-                // check for interruption
-                checkInterruption();
+                // trying to determine the mimeType per file extension   
+                String entryMime = plasmaParser.getMimeTypeByFileExt(entryExt);      
                 
                 // parsing the content
-                plasmaParserDocument theDoc = theParser.parseSource(location,entryMime,null, ut);
+                plasmaParserDocument theDoc = null;
+                File tempFile = null;
+                try {
+                    // create the temp file
+                    tempFile = createTempFile(entryName);
+                    
+                    // copy the data into the file
+                    serverFileUtils.copy(zippedContent,tempFile,entry.getSize());                    
+                    
+                    // parsing the zip file entry
+                    theDoc = theParser.parseSource(new URL(location,"#" + entryName),entryMime,null, tempFile);
+                } catch (ParserException e) {
+                    this.theLogger.logInfo("Unable to parse zip file entry '" + entryName + "'. " + e.getErrorCode());
+                } finally {
+                    if (tempFile != null) try {tempFile.delete(); } catch(Exception ex){/* ignore this */}
+                }
                 if (theDoc == null) continue;
                 
                 // merging all documents together
@@ -157,11 +168,7 @@ public class zipParser extends AbstractParser implements Parser {
                 docImages.addAll(theDoc.getImages());
             }
             
-            /* (URL location, String mimeType,
-             String keywords, String shortTitle, String longTitle,
-             String[] sections, String abstrct,
-             byte[] text, Map anchors, Map images)
-             */            
+        
             return new plasmaParserDocument(
                     location,
                     mimeType,
@@ -176,9 +183,9 @@ public class zipParser extends AbstractParser implements Parser {
                     docImages);
         } catch (Exception e) {  
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            throw new ParserException("Unable to parse the zip content. " + e.getMessage());
-        } catch (Error e) {
-            throw new ParserException("Unable to parse the zip content. " + e.getMessage());
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            throw new ParserException("Unexpected error while parsing zip resource. " + e.getMessage(),location);
         }
     }
     
diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java
index d8b8fdca7..e349da8bf 100644
--- a/source/de/anomic/plasma/plasmaCrawlLoader.java
+++ b/source/de/anomic/plasma/plasmaCrawlLoader.java
@@ -52,6 +52,7 @@ import org.apache.commons.pool.impl.GenericObjectPool;
 
 import de.anomic.net.URL;
 import de.anomic.plasma.crawler.plasmaCrawlWorker;
+import de.anomic.plasma.crawler.plasmaCrawlerException;
 import de.anomic.plasma.crawler.plasmaCrawlerFactory;
 import de.anomic.plasma.crawler.plasmaCrawlerMsgQueue;
 import de.anomic.plasma.crawler.plasmaCrawlerPool;
@@ -83,7 +84,7 @@ public final class plasmaCrawlLoader extends Thread {
 
         // supported protocols 
         // TODO: change this, e.g. by loading settings from file
-        this.supportedProtocols = new HashSet(Arrays.asList(new String[]{"http","https" /* ,"ftp" */}));
+        this.supportedProtocols = new HashSet(Arrays.asList(new String[]{"http","https"/* ,"ftp" */}));
         
         // configuring the crawler messagequeue
         this.theQueue = new plasmaCrawlerMsgQueue();
@@ -99,6 +100,8 @@ public final class plasmaCrawlLoader extends Thread {
         // The maximum number of idle connections connections in the pool
         // 0 = no limit.        
         this.crawlerPoolConfig.maxIdle = Integer.parseInt(switchboard.getConfig("crawler.MaxIdleThreads","7"));
+        
+        // minIdle configuration not possible for keyedObjectPools
         //this.crawlerPoolConfig.minIdle = Integer.parseInt(switchboard.getConfig("crawler.MinIdleThreads","5"));    
 
         // block undefinitely 
@@ -216,7 +219,7 @@ public final class plasmaCrawlLoader extends Thread {
             int depth, 
             plasmaCrawlProfile.entry profile,
             int timeout
-    ) {
+    ) throws plasmaCrawlerException {
 
         plasmaHTCache.Entry result = null;
         if (!this.crawlwerPool.isClosed) {            
@@ -241,11 +244,17 @@ public final class plasmaCrawlLoader extends Thread {
                 this.execute(theMsg);
 
                 // wait for the crawl job result
-                result = theMsg.waitForResult();
-
+                result = theMsg.waitForResult();                
             } catch (Exception e) {
-                this.log.logSevere("plasmaCrawlLoader.loadSync", e);
+                this.log.logSevere("plasmaCrawlLoader.loadSync: Unexpected error", e);
+                throw new plasmaCrawlerException("Unexpected error: " + e.getMessage());
             }
+            
+            // check if an error has occured
+            if (result == null) {
+                String errorMsg = theMsg.getError();
+                throw new plasmaCrawlerException(errorMsg);
+            }            
         }        
 
         // return the result
diff --git a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
index d79674b19..b3d678c67 100644
--- a/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
+++ b/source/de/anomic/plasma/plasmaCrawlLoaderMessage.java
@@ -59,6 +59,7 @@ public final class plasmaCrawlLoaderMessage {
     
     private serverSemaphore resultSync  = null;
     private plasmaHTCache.Entry result;
+    private String errorMessage;
     
     // loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) {
     public plasmaCrawlLoaderMessage(
@@ -86,6 +87,14 @@ public final class plasmaCrawlLoaderMessage {
         this.result = null;
     } 
     
+    public void setError(String errorMessage) {
+        this.errorMessage = errorMessage;
+    }
+    
+    public String getError() {
+        return this.errorMessage;
+    }
+    
     public void setResult(plasmaHTCache.Entry theResult) {
         // store the result
         this.result = theResult;
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index 9c22a93ca..0e5933193 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -73,12 +73,14 @@ import de.anomic.htmlFilter.htmlFilterInputStream;
 import de.anomic.htmlFilter.htmlFilterWriter;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpc;
+import de.anomic.index.indexURL;
 import de.anomic.net.URL;
 import de.anomic.plasma.parser.Parser;
 import de.anomic.plasma.parser.ParserException;
 import de.anomic.plasma.parser.ParserInfo;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.logging.serverLog;
+import de.anomic.tools.bitfield;
 
 public final class plasmaParser {
     public static final String PARSER_MODE_PROXY   = "PROXY";
@@ -407,7 +409,7 @@ public final class plasmaParser {
                         if (neededLibx != null) {
                             for (int libxId=0; libxId < neededLibx.length; libxId++) {
                                 if (javaClassPath.indexOf(neededLibx[libxId]) == -1) {
-                                    throw new ParserException("Missing dependency detected: '" + neededLibx[libxId] + "'.");
+                                    throw new Exception("Missing dependency detected: '" + neededLibx[libxId] + "'.");
                                 }
                                 neededLibxBuf.append(neededLibx[libxId])
                                              .append(",");
@@ -464,42 +466,67 @@ public final class plasmaParser {
         // closing the parser object pool
         try {
             theParserPool.close();
-        } catch (Exception e) { }
+        } catch (Exception e) {/* ignore this */}
     }    
     
-    public plasmaParserDocument parseSource(URL location, String mimeType, String charset, byte[] source) throws InterruptedException {
+    public plasmaParserDocument parseSource(URL location, String mimeType, String charset, byte[] source) 
+    throws InterruptedException, ParserException {
         File tempFile = null;
         try {
+            // creating a temp file to store the byte array
             tempFile = File.createTempFile("parseSource", ".tmp");
             serverFileUtils.write(source, tempFile);
+            
+            // parsing the temp file
             return parseSource(location, mimeType, charset, tempFile);
+            
         } catch (Exception e) {
+            // Interrupted- and Parser-Exceptions should pass through
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            serverLog.logSevere("PARSER", "parseSource1: " + e.getMessage(), e);
-            return null;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            // log unexpected error
+            this.theLogger.logSevere("Unexpected exception in parseSource1: " + e.getMessage(), e);
+            throw new ParserException("Unexpected exception while parsing " + location,location, e);
         } finally {
-            if (tempFile != null) try { tempFile.delete(); } catch (Exception ex){}
+            if (tempFile != null) try { tempFile.delete(); } catch (Exception ex){/* ignore this */}
         }
         
     }
 
-    public plasmaParserDocument parseSource(URL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException {
+    public plasmaParserDocument parseSource(URL location, String theMimeType, String theDocumentCharset, File sourceFile) 
+    throws InterruptedException, ParserException {
 
         Parser theParser = null;
+        String mimeType = null;
         try {
             // getting the mimetype of the document
-            mimeType = getRealMimeType(mimeType);
+            mimeType = getRealMimeType(theMimeType);
             
             // getting the file extension of the document
             String fileExt = getFileExt(location);
             
             // getting the charset of the document
-            if (documentCharset == null)
-                // TODO: do a charset detection here ....
-                documentCharset = "ISO-8859-1";
+            // TODO: do a charset detection here ....
+            String documentCharset = (theDocumentCharset == null) ? "ISO-8859-1" : theDocumentCharset;
+            
+            // testing if parsing is supported for this resource
+            if (!plasmaParser.supportedContent(location,mimeType)) {
+                String errorMsg = "No parser available to parse mimetype";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT);
+            }
+            
+            // testing if the resource is not empty
+            if (!(sourceFile.exists() && sourceFile.canRead() && sourceFile.length() > 0)) {
+                String errorMsg = "No resource content available.";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_NOT_PARSEABLE_NO_CONTENT);
+            }
+
             
             if (this.theLogger.isFine())
-                this.theLogger.logFine("Parsing " + location + " with mimeType '" + mimeType + 
+                this.theLogger.logInfo("Parsing " + location + " with mimeType '" + mimeType + 
                                        "' and file extension '" + fileExt + "'.");
             
             /*
@@ -555,26 +582,43 @@ public final class plasmaParser {
             theParser = this.getParser(mimeType);
             
             // if a parser was found we use it ...
+            plasmaParserDocument doc = null;
             if (theParser != null) {
-                return theParser.parse(location, mimeType,documentCharset,sourceFile);
+                doc = theParser.parse(location, mimeType,documentCharset,sourceFile);
             } else if (realtimeParsableMimeTypesContains(mimeType)) {                      
-                return parseHtml(location, mimeType, documentCharset, sourceFile);
+                doc = parseHtml(location, mimeType, documentCharset, sourceFile);
             } else {
-                serverLog.logWarning("PARSER", "parseSource2: wrong mime type");
-                return null;
+                String errorMsg = "No parser available to parse mimetype";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT);                
+            }
+            
+            // check result
+            if (doc == null) {
+                String errorMsg = "Unexpected error. Parser returned null.";
+                this.theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg,location);                
             }
+            return doc;
+            
         } catch (Exception e) {
+            // Interrupted- and Parser-Exceptions should pass through
             if (e instanceof InterruptedException) throw (InterruptedException) e;
-            serverLog.logSevere("PARSER", "parseSource2: " + e.getMessage(), e);
-            return null;
+            if (e instanceof ParserException) throw (ParserException) e;
+            
+            // log unexpected error
+            String errorMsg = "Unexpected exception. " + e.getMessage();
+            this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
+            throw new ParserException(errorMsg,location,e);            
+            
         } finally {
             if (theParser != null) {
-                try { plasmaParser.theParserPool.returnObject(mimeType, theParser); } catch (Exception e) { }
+                try { plasmaParser.theParserPool.returnObject(mimeType, theParser); } catch (Exception e) { /* ignore this */}
             }
         }
     }
     
-    private plasmaParserDocument parseHtml(URL location, String mimeType, String documentCharset, File sourceFile) throws IOException {
+    private plasmaParserDocument parseHtml(URL location, String mimeType, String documentCharset, File sourceFile) throws IOException, ParserException {
         
         // ...otherwise we make a scraper and transformer
         FileInputStream fileIn = new FileInputStream(sourceFile);
@@ -596,8 +640,9 @@ public final class plasmaParser {
         //serverFileUtils.copy(sourceFile, hfos);
         //hfos.close();
         if (writer.binarySuspect()) {
-            this.theLogger.logInfo("Binary data found in URL " + location);
-            return null;
+            String errorMsg = "Binary data found in resource";
+            this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg);
+            throw new ParserException(errorMsg,location);    
         }
         return transformScraper(location, mimeType, documentCharset, scraper);        
     }
diff --git a/source/de/anomic/plasma/plasmaSearchImages.java b/source/de/anomic/plasma/plasmaSearchImages.java
index 3782ff752..a7387604b 100644
--- a/source/de/anomic/plasma/plasmaSearchImages.java
+++ b/source/de/anomic/plasma/plasmaSearchImages.java
@@ -43,6 +43,8 @@ package de.anomic.plasma;
 
 import java.net.MalformedURLException;
 import de.anomic.net.URL;
+import de.anomic.plasma.parser.ParserException;
+
 import java.util.Iterator;
 import java.util.Map;
 import java.util.TreeSet;
@@ -60,10 +62,16 @@ public final class plasmaSearchImages {
         if (maxTime > 10) {
             byte[] res = sc.getResource(url, true, (int) maxTime);
             if (res != null) {
-                plasmaParserDocument document = sc.parseDocument(url, res);
-
+                plasmaParserDocument document = null;
+                try {
+                    document = sc.parseDocument(url, res);
+                } catch (ParserException e) {
+                    // parsing failed
+                }
+                if (document == null) return;
+                
                 // add the image links
-                if (document != null) this.addAll(document.getImages());
+                this.addAll(document.getImages());
 
                 // add also links from pages one step deeper, if depth > 0
                 if (depth > 0) {
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 60e4f3e60..efed8fbba 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -45,6 +45,8 @@ package de.anomic.plasma;
 import java.io.IOException;
 import de.anomic.net.URL;
 import de.anomic.plasma.cache.IResourceInfo;
+import de.anomic.plasma.crawler.plasmaCrawlerException;
+import de.anomic.plasma.parser.ParserException;
 
 import java.util.Enumeration;
 import java.util.HashMap;
@@ -164,30 +166,51 @@ public class plasmaSnippetCache {
             return new Snippet(line, source, null);
         }
         
+        /* ===========================================================================
+         * LOADING RESOURCE DATA
+         * =========================================================================== */
         // if the snippet is not in the cache, we can try to get it from the htcache
         byte[] resource = null;
         IResourceInfo docInfo = null;
         try {
+            // trying to load the resource from the cache
             resource = this.cacheManager.loadResourceContent(url);
-            if ((fetchOnline) && (resource == null)) {
+            docInfo = this.cacheManager.loadResourceInfo(url);
+            
+            // if not found try to download it
+            if ((resource == null) && (fetchOnline)) {
+                // download resource using the crawler
                 plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
+                
+                // getting resource metadata (e.g. the http headers for http resources)
                 if (entry != null) {
                     docInfo = entry.getDocumentInfo();
                 }
+                
+                // now the resource should be stored in the cache, load body
                 resource = this.cacheManager.loadResourceContent(url);
+                if (resource == null) {
+                    //System.out.println("cannot load document for URL " + url);
+                    return new Snippet(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL");
+                }                                
                 source = SOURCE_WEB;
             }
-        } catch (IOException e) {
-            e.printStackTrace();
+        } catch (Exception e) {
+            if (!(e instanceof plasmaCrawlerException)) e.printStackTrace();
             return new Snippet(null, ERROR_SOURCE_LOADING, "error loading resource from web: " + e.getMessage());
         }
-        if (resource == null) {
-            //System.out.println("cannot load document for URL " + url);
-            return new Snippet(null, ERROR_RESOURCE_LOADING, "error loading resource from web, cacheManager returned NULL");
-        }
-        plasmaParserDocument document = parseDocument(url, resource, docInfo);
         
+        /* ===========================================================================
+         * PARSING RESOURCE
+         * =========================================================================== */
+        plasmaParserDocument document = null;
+        try {
+             document = parseDocument(url, resource, docInfo);            
+        } catch (ParserException e) {
+            return new Snippet(null, ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
+        }
         if (document == null) return new Snippet(null, ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
+                
         //System.out.println("loaded document for URL " + url);
         String[] sentences = document.getSentences();
         //System.out.println("----" + url.toString()); for (int l = 0; l < sentences.length; l++) System.out.println(sentences[l]);
@@ -196,6 +219,9 @@ public class plasmaSnippetCache {
             return new Snippet(null, ERROR_PARSER_NO_LINES, "parser returned no sentences");
         }
 
+        /* ===========================================================================
+         * COMPUTE SNIPPET
+         * =========================================================================== */        
         // we have found a parseable non-empty file: use the lines
         line = computeSnippet(sentences, queryhashes, 8 + 6 * queryhashes.size(), snippetMaxLength);
         //System.out.println("loaded snippet for URL " + url + ": " + line);
@@ -207,22 +233,48 @@ public class plasmaSnippetCache {
         return new Snippet(line, source, null);
     }
 
+    /**
+     * Tries to load and parse a resource specified by it's URL.
+     * If the resource is not stored in cache and if fetchOnline is set the
+     * this function tries to download the resource from web.
+     * 
+     * @param url the URL of the resource
+     * @param fetchOnline specifies if the resource should be loaded from web if it'as not available in the cache
+     * @return the parsed document as {@link plasmaParserDocument}
+     */
     public plasmaParserDocument retrieveDocument(URL url, boolean fetchOnline) {
         byte[] resource = null;
         IResourceInfo docInfo = null;
         try {
+            // trying to load the resource body from cache
             resource = this.cacheManager.loadResourceContent(url);
+            
+            // if not available try to load resource from web
             if ((fetchOnline) && (resource == null)) {
+                // download resource using crawler
                 plasmaHTCache.Entry entry = loadResourceFromWeb(url, 5000);
+                
+                // fetching metadata of the resource (e.g. http headers for http resource)
                 if (entry != null) docInfo = entry.getDocumentInfo();
+                
+                // getting the resource body from the cache
                 resource = this.cacheManager.loadResourceContent(url);
+            } else {
+                // trying to load resource metadata
+                docInfo = this.cacheManager.loadResourceInfo(url);
             }
-        } catch (IOException e) {
-            e.printStackTrace();
+            
+            // parsing document
+            if (resource == null) return null;
+            return parseDocument(url, resource, docInfo);
+        } catch (ParserException e) {
+            this.log.logWarning("Unable to parse resource. " + e.getMessage());
+            return null;
+        } catch (Exception e) {
+            this.log.logWarning("Unexpected error while retrieving document. " + e.getMessage(),e);
             return null;
         }
-        if (resource == null) return null;
-        return parseDocument(url, resource, docInfo);
+
     }
     
     public void storeToCache(String wordhashes, String urlhash, String snippet) {
@@ -374,11 +426,11 @@ public class plasmaSnippetCache {
         return map;
     }
      
-    public plasmaParserDocument parseDocument(URL url, byte[] resource) {
+    public plasmaParserDocument parseDocument(URL url, byte[] resource) throws ParserException {
         return parseDocument(url, resource, null);
     }
     
-    public plasmaParserDocument parseDocument(URL url, byte[] resource, IResourceInfo docInfo) {
+    public plasmaParserDocument parseDocument(URL url, byte[] resource, IResourceInfo docInfo) throws ParserException {
         try {
             if (resource == null) return null;
 
@@ -425,9 +477,15 @@ public class plasmaSnippetCache {
     public byte[] getResource(URL url, boolean fetchOnline, int socketTimeout) {
         // load the url as resource from the web
         try {
+            // trying to load the resource body from cache
             byte[] resource = cacheManager.loadResourceContent(url);
+            
+            // if the content is not available in cache try to download it from web
             if ((fetchOnline) && (resource == null)) {
+                // try to download the resource using a crawler
                 loadResourceFromWeb(url, (socketTimeout < 0) ? -1 : socketTimeout);
+                
+                // get the content from cache
                 resource = cacheManager.loadResourceContent(url);
             }
             return resource;
@@ -436,7 +494,7 @@ public class plasmaSnippetCache {
         }
     }
     
-    public plasmaHTCache.Entry loadResourceFromWeb(URL url, int socketTimeout) throws IOException {
+    public plasmaHTCache.Entry loadResourceFromWeb(URL url, int socketTimeout) throws plasmaCrawlerException {
         
         plasmaHTCache.Entry result = this.sb.cacheLoader.loadSync(
                 url, 
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index d68ce3a0e..87c86f10e 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -144,6 +144,7 @@ import de.anomic.kelondro.kelondroMSetTools;
 import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.kelondro.kelondroMapTable;
 import de.anomic.plasma.dbImport.dbImportManager;
+import de.anomic.plasma.parser.ParserException;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverAbstractSwitch;
 import de.anomic.server.serverCodings;
@@ -1392,7 +1393,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         }
     }
     
-    private plasmaParserDocument parseResource(plasmaSwitchboardQueue.Entry entry, String initiatorHash) throws InterruptedException {
+    private plasmaParserDocument parseResource(plasmaSwitchboardQueue.Entry entry, String initiatorHash) throws InterruptedException, ParserException {
         plasmaParserDocument document = null;
 
         // the mimetype of this entry
@@ -1402,29 +1403,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         // the parser logger
         serverLog parserLogger = parser.getLogger();
 
-        // if the document content is supported we can start to parse the content
-        if (plasmaParser.supportedContent(
-                entry.url(),
-                mimeType)
-        ){
-            if ((entry.cacheFile().exists()) && (entry.cacheFile().length() > 0)) {
-                parserLogger.logFine("'" + entry.normalizedURLString() + "' is not parsed yet, parsing now from File");
-                document = parser.parseSource(entry.url(), mimeType, charset, entry.cacheFile());
-            } else {
-                parserLogger.logFine("'" + entry.normalizedURLString() + "' cannot be parsed, no resource available");
-                addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorHash, entry.anchorName(), plasmaCrawlEURL.DENIED_NOT_PARSEABLE_NO_CONTENT, new bitfield(indexURL.urlFlagLength));
-            }
-            if (document == null) {
-                parserLogger.logSevere("'" + entry.normalizedURLString() + "' parse failure");
-                addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorHash, entry.anchorName(), plasmaCrawlEURL.DENIED_PARSER_ERROR, new bitfield(indexURL.urlFlagLength));
-            }
-        } else {
-            parserLogger.logFine("'" + entry.normalizedURLString() + "'. Unsupported mimeType '" + ((mimeType == null) ? "null" : mimeType) + "'.");
-            addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorHash, entry.anchorName(), plasmaCrawlEURL.DENIED_WRONG_MIMETYPE_OR_EXT, new bitfield(indexURL.urlFlagLength));
-        }  
-
-        checkInterruption();
-        return document;
+        // parse the document
+        return parseResource(entry.url(), mimeType, charset, entry.cacheFile());
+    }
+    
+    public plasmaParserDocument parseResource(URL location, String mimeType, String documentCharset, File sourceFile) throws InterruptedException, ParserException {
+        plasmaParserDocument doc = parser.parseSource(location, mimeType, documentCharset, sourceFile);
+        assert(doc != null) : "Unexpected error. Parser returned null.";
+        return doc;
     }
     
     private void processResourceStack(plasmaSwitchboardQueue.Entry entry) throws InterruptedException {
@@ -1471,8 +1457,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
             plasmaParserDocument document = null;
             parsingStartTime = System.currentTimeMillis();
 
+            try {
                 document = this.parseResource(entry, initiatorPeerHash);
                 if (document == null) return;
+            } catch (ParserException e) {
+                this.log.logInfo("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage());
+                addURLtoErrorDB(entry.url(), entry.referrerHash(), initiatorPeerHash, entry.anchorName(), e.getErrorCode(), new bitfield(indexURL.urlFlagLength));
+                return;
+            }
             
             parsingEndTime = System.currentTimeMillis();            
             
@@ -2172,16 +2164,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         // determine the url string
         plasmaCrawlLURL.Entry entry = urlPool.loadedURL.load(urlhash, null);
         if (entry == null) return 0;
+        
         URL url = entry.url();
         if (url == null) return 0;
-        // get set of words
-        // Set words = plasmaCondenser.getWords(getText(getResource(url, fetchOnline)));
-        Iterator witer = plasmaCondenser.getWords(snippetCache.parseDocument(url, snippetCache.getResource(url, fetchOnline, 10000)).getText());
-        // delete all word references
-        int count = removeReferences(urlhash, witer);
-        // finally delete the url entry itself
-        urlPool.loadedURL.remove(urlhash);
-        return count;
+        
+        try {
+            // get set of words
+            // Set words = plasmaCondenser.getWords(getText(getResource(url, fetchOnline)));
+            Iterator witer = plasmaCondenser.getWords(snippetCache.parseDocument(url, snippetCache.getResource(url, fetchOnline, 10000)).getText());
+            // delete all word references
+            int count = removeReferences(urlhash, witer);
+            // finally delete the url entry itself
+            urlPool.loadedURL.remove(urlhash);
+            return count;
+        } catch (ParserException e) {
+            return 0;
+        }
     }
     
     public int removeReferences(URL url, Set words) {
diff --git a/source/de/anomic/server/serverByteBuffer.java b/source/de/anomic/server/serverByteBuffer.java
index 295583d6a..9030ee036 100644
--- a/source/de/anomic/server/serverByteBuffer.java
+++ b/source/de/anomic/server/serverByteBuffer.java
@@ -188,6 +188,10 @@ public final class serverByteBuffer extends OutputStream {
     public serverByteBuffer append(String s) {
         return append(s.getBytes());
     }
+    
+    public serverByteBuffer append(String s, String charset) throws UnsupportedEncodingException {
+        return append(s.getBytes(charset));
+    }    
 
     public serverByteBuffer append(serverByteBuffer bb) {
         return append(bb.buffer, bb.offset, bb.length);
diff --git a/source/de/anomic/server/serverFileUtils.java b/source/de/anomic/server/serverFileUtils.java
index a90db5f01..974994b24 100644
--- a/source/de/anomic/server/serverFileUtils.java
+++ b/source/de/anomic/server/serverFileUtils.java
@@ -73,24 +73,39 @@ import de.anomic.kelondro.kelondroRowSet;
 
 public final class serverFileUtils {
 
+    private static final int DEFAULT_BUFFER_SIZE = 4096;
+    
+    public static long copy(InputStream source, OutputStream dest) throws IOException {
+        return copy(source,dest);
+    }
+    
     /**
     * Copies an InputStream to an OutputStream.
-    * @param source    InputStream
-    * @param dest    OutputStream
+    * @param source InputStream
+    * @param dest OutputStream
+    * @param count the total amount of bytes to copy
     * @return Total number of bytes copied.
+    * 
     * @see copy(InputStream source, File dest)
     * @see copyRange(File source, OutputStream dest, int start)
     * @see copy(File source, OutputStream dest)
     * @see copy(File source, File dest)
     */
-    public static int copy(InputStream source, OutputStream dest) throws IOException {
-        byte[] buffer = new byte[4096];
+    public static long copy(InputStream source, OutputStream dest, long count) throws IOException {
+        byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];                
+        int chunkSize = (int) ((count > 0) ? Math.min(count, DEFAULT_BUFFER_SIZE) : DEFAULT_BUFFER_SIZE);
         
-        int c, total = 0;
-        while ((c = source.read(buffer)) > 0) {
+        int c; long total = 0;
+        while ((c = source.read(buffer,0,chunkSize)) > 0) {
             dest.write(buffer, 0, c);
             dest.flush();
             total += c;
+            
+            if (count > 0) {
+                chunkSize = (int)Math.min(count-total,DEFAULT_BUFFER_SIZE);
+                if (chunkSize == 0) break;
+            }
+            
         }
         dest.flush();
         
@@ -165,21 +180,26 @@ public final class serverFileUtils {
         }
         return count;
     }
+    
+    public static void copy(InputStream source, File dest) throws IOException {
+        copy(source,dest,-1);
+    }
 
     /**
     * Copies an InputStream to a File.
     * @param source    InputStream
     * @param dest    File
+    * @param the amount of bytes to copy
     * @see copy(InputStream source, OutputStream dest)
     * @see copyRange(File source, OutputStream dest, int start)
     * @see copy(File source, OutputStream dest)
     * @see copy(File source, File dest)
     */
-    public static void copy(InputStream source, File dest) throws IOException {
+    public static void copy(InputStream source, File dest, long count) throws IOException {
         FileOutputStream fos = null;
         try {
             fos = new FileOutputStream(dest);
-            copy(source, fos);
+            copy(source, fos, count);
         } finally {
             if (fos != null) try {fos.close();} catch (Exception e) {}
         }
@@ -201,7 +221,7 @@ public final class serverFileUtils {
             fis = new FileInputStream(source);
             long skipped = fis.skip(start);
             if (skipped != start) throw new IllegalStateException("Unable to skip '" + start + "' bytes. Only '" + skipped + "' bytes skipped.");
-            copy(fis, dest);
+            copy(fis, dest,-1);
         } finally {
             if (fis != null) try { fis.close(); } catch (Exception e) {}
         }
@@ -220,28 +240,33 @@ public final class serverFileUtils {
         InputStream fis = null;
         try {
             fis = new FileInputStream(source);
-            copy(fis, dest);
+            copy(fis, dest, -1);
         } finally {
             if (fis != null) try { fis.close(); } catch (Exception e) {}
         }
     }
 
+    public static void copy(File source, File dest) throws IOException {
+        copy(source,dest,-1);
+    }
+    
     /**
     * Copies a File to a File.
     * @param source    File
     * @param dest    File
+    * @param count the amount of bytes to copy
     * @see copy(InputStream source, OutputStream dest)
     * @see copy(InputStream source, File dest)
     * @see copyRange(File source, OutputStream dest, int start)
     * @see copy(File source, OutputStream dest)
     */
-    public static void copy(File source, File dest) throws IOException {
+    public static void copy(File source, File dest, long count) throws IOException {
         FileInputStream fis = null;
         FileOutputStream fos = null;
         try {
             fis = new FileInputStream(source);
             fos = new FileOutputStream(dest);
-            copy(fis, fos);
+            copy(fis, fos, count);
         } finally {
             if (fis != null) try {fis.close();} catch (Exception e) {}
             if (fos != null) try {fos.close();} catch (Exception e) {}
@@ -250,7 +275,7 @@ public final class serverFileUtils {
 
     public static byte[] read(InputStream source) throws IOException {
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        copy(source, baos);
+        copy(source, baos, -1);
         baos.close();
         return baos.toByteArray();
     }
@@ -309,7 +334,7 @@ public final class serverFileUtils {
     }
 
     public static void write(byte[] source, OutputStream dest) throws IOException {
-        copy(new ByteArrayInputStream(source), dest);
+        copy(new ByteArrayInputStream(source), dest, -1);
     }
 
     public static void write(byte[] source, File dest) throws IOException {