From b2263bc720c473854b22c2fccb8c124cff0f7a81 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 14 Jul 2009 11:01:05 +0000
Subject: [PATCH] enhanced document type recognition

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6209 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 defaults/yacy.init                            |   1 +
 htroot/ConfigParser.java                      |   6 +-
 source/de/anomic/crawler/FTPLoader.java       |  16 +-
 source/de/anomic/crawler/HTTPLoader.java      |  10 +-
 source/de/anomic/crawler/IndexingStack.java   |   9 +-
 source/de/anomic/document/Idiom.java          |  15 +-
 source/de/anomic/document/Parser.java         | 151 ++++++++++++------
 .../de/anomic/document/parser/bzipParser.java |  29 ++--
 .../de/anomic/document/parser/docParser.java  |  37 +++--
 .../de/anomic/document/parser/gzipParser.java |  32 ++--
 .../de/anomic/document/parser/htmlParser.java |  37 +++--
 .../de/anomic/document/parser/odtParser.java  |  23 ++-
 .../de/anomic/document/parser/pdfParser.java  |  26 +--
 .../de/anomic/document/parser/pptParser.java  |  35 ++--
 .../de/anomic/document/parser/psParser.java   |  24 ++-
 .../de/anomic/document/parser/rpmParser.java  |  21 ++-
 .../de/anomic/document/parser/rssParser.java  |  27 ++--
 .../de/anomic/document/parser/rtfParser.java  |  30 ++--
 .../document/parser/sevenzipParser.java       |  18 ++-
 .../de/anomic/document/parser/swfParser.java  |  24 +--
 .../de/anomic/document/parser/tarParser.java  |  22 ++-
 .../de/anomic/document/parser/vcfParser.java  |  28 ++--
 .../de/anomic/document/parser/vsdParser.java  |  35 ++--
 .../de/anomic/document/parser/xlsParser.java  |  32 ++--
 .../de/anomic/document/parser/zipParser.java  |  31 ++--
 source/de/anomic/http/httpdProxyHandler.java  |   6 +-
 source/de/anomic/plasma/plasmaHTCache.java    |   5 -
 .../de/anomic/plasma/plasmaSwitchboard.java   |   8 +-
 source/de/anomic/search/SnippetCache.java     |  16 +-
 29 files changed, 480 insertions(+), 274 deletions(-)

diff --git a/defaults/yacy.init b/defaults/yacy.init
index 791381c24..5abb2b88f 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -250,6 +250,7 @@ minimumGlobalDelta = 500
 # the following mime-types are a blacklist for indexing:
 # parser.mime.deny: specifies mime-types that shall not be indexed
 parser.mime.deny=
+parser.extensions.deny=
 
 # Promotion Strings
 # These strings appear in the Web Mask of the YACY search client
diff --git a/htroot/ConfigParser.java b/htroot/ConfigParser.java
index 7d1775a5d..c4bfc5002 100644
--- a/htroot/ConfigParser.java
+++ b/htroot/ConfigParser.java
@@ -49,7 +49,7 @@ public class ConfigParser {
                 post.remove("parserSettings");
                 
                 for (Idiom parser: Parser.idioms()) {
-                    for (String mimeType: parser.getSupportedMimeTypes().keySet()) {
+                    for (String mimeType: parser.supportedMimeTypes()) {
                         Parser.grantMime(mimeType, post.get("mimename_" + mimeType, "").equals("on"));
                     }
                 }
@@ -62,9 +62,9 @@ public class ConfigParser {
             prop.put("parser_" + i + "_name", parser.getName());
             
             int mimeIdx = 0;
-            for (String mimeType: parser.getSupportedMimeTypes().keySet()) {
+            for (String mimeType: parser.supportedMimeTypes()) {
                 prop.put("parser_" + i + "_mime_" + mimeIdx + "_mimetype", mimeType);
-                prop.put("parser_" + i + "_mime_" + mimeIdx + "_status", (Parser.supportsMime(mimeType)) ? 1 : 0);
+                prop.put("parser_" + i + "_mime_" + mimeIdx + "_status", (Parser.supportsMime(mimeType) == null) ? 1 : 0);
                 mimeIdx++;
             }
             prop.put("parser_" + i + "_mime", mimeIdx);
diff --git a/source/de/anomic/crawler/FTPLoader.java b/source/de/anomic/crawler/FTPLoader.java
index f501c247b..e0eec059e 100644
--- a/source/de/anomic/crawler/FTPLoader.java
+++ b/source/de/anomic/crawler/FTPLoader.java
@@ -224,16 +224,12 @@ public class FTPLoader {
         // if the mimetype and file extension is supported we start to download
         // the file
         httpDocument htCache = null;
-        if (!Parser.supportsExtension(entryUrl)) {
-            // if the response has not the right file type then reject file
-            log.logInfo("REJECTED WRONG EXTENSION TYPE " + mimeType + " for URL " + entry.url().toString());
-            sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, "wrong extension");
-            throw new Exception("response has not the right extension type -> rejected");
-        } else if (!Parser.supportsMime(mimeType)) {
-            // if the response has not the right file type then reject file
-            log.logInfo("REJECTED WRONG MIME TYPE " + mimeType + " for URL " + entry.url().toString());
-            sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, "wrong mime type");
-            throw new Exception("response has not the right mime type -> rejected");
+        String supportError = Parser.supports(entryUrl, mimeType);
+        if (supportError != null) {
+            // reject file
+            log.logInfo("PARSER REJECTED URL " + entry.url().toString() + ": " + supportError);
+            sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, supportError);
+            throw new Exception(supportError);
         } else {
             // abort the download if content is too long
             final int size = ftpClient.fileSize(path);
diff --git a/source/de/anomic/crawler/HTTPLoader.java b/source/de/anomic/crawler/HTTPLoader.java
index cb083537e..acdadf150 100644
--- a/source/de/anomic/crawler/HTTPLoader.java
+++ b/source/de/anomic/crawler/HTTPLoader.java
@@ -120,8 +120,9 @@ public final class HTTPLoader {
         if (port < 0) port = (ssl) ? 443 : 80;
         
         // if not the right file type then reject file
-        if (!Parser.supportsExtension(entry.url())) {
-            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "wrong extension");
+        String supportError = Parser.supportsExtension(entry.url());
+        if (supportError != null) {
+            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, supportError);
             throw new IOException("REJECTED WRONG EXTENSION TYPE " + entry.url().getFileExtension()+ " for URL " + entry.url().toString());
         } 
         
@@ -166,8 +167,9 @@ public final class HTTPLoader {
                     //try {
                     
                 	// if the response has not the right file type then reject file
-                    if (!Parser.supportsMime(res.getResponseHeader().mime())) {
-                    	sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "wrong mime type");
+                    supportError = Parser.supports(entry.url(), res.getResponseHeader().mime());
+                    if (supportError != null) {
+                    	sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, supportError);
                     	throw new IOException("REJECTED WRONG MIME TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString());
                     }
 
diff --git a/source/de/anomic/crawler/IndexingStack.java b/source/de/anomic/crawler/IndexingStack.java
index 47b1aa19b..969dec3cb 100644
--- a/source/de/anomic/crawler/IndexingStack.java
+++ b/source/de/anomic/crawler/IndexingStack.java
@@ -34,6 +34,7 @@ import java.util.Date;
 import java.util.Iterator;
 import java.util.concurrent.ConcurrentHashMap;
 
+import de.anomic.document.Parser;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpResponseHeader;
 import de.anomic.kelondro.index.Row;
@@ -480,8 +481,9 @@ public class IndexingStack {
                 if (plasmaHTCache.isPicture(mimeType)) {
                     return "Media_Content_(Picture)";
                 }
-                if (!plasmaHTCache.isText(mimeType)) {
-                    return "Media_Content_(not_text)";
+                String parserError = Parser.supportsMime(mimeType);
+                if (parserError != null) {
+                    return "Media_Content, no parser: " + parserError;
                 }
     
                 // -if-modified-since in request
@@ -598,7 +600,8 @@ public class IndexingStack {
             if (responseHeader != null) {
                 final String mimeType = responseHeader.mime();
                 if (plasmaHTCache.isPicture(mimeType)) { return "Media_Content_(Picture)"; }
-                if (!plasmaHTCache.isText(mimeType)) { return "Media_Content_(not_text)"; }
+                String parserError = Parser.supportsMime(mimeType);
+                if (parserError != null) { return "Media_Content, parser error: " + parserError; }
             }
             if (plasmaHTCache.noIndexingURL(url())) { return "Media_Content_(forbidden)"; }
 
diff --git a/source/de/anomic/document/Idiom.java b/source/de/anomic/document/Idiom.java
index 5ab8405ee..de54e34f9 100644
--- a/source/de/anomic/document/Idiom.java
+++ b/source/de/anomic/document/Idiom.java
@@ -27,8 +27,7 @@ package de.anomic.document;
 
 import java.io.File;
 import java.io.InputStream;
-import java.util.HashMap;
-import java.util.Hashtable;
+import java.util.Set;
 
 import de.anomic.yacy.yacyURL;
 
@@ -87,11 +86,15 @@ public interface Idiom {
             
     /**
      * Get the MimeType(s) that are supported by the parser
-     * @return a {@link Hashtable} containing a mapping from a mime type string
-     * to a comma-separated String of file extensions
-     * that are supported by the idiom parser
+     * @return a set of strings denoting the supported mime types
      */
-    public HashMap<String, String> getSupportedMimeTypes();
+    public Set<String> supportedMimeTypes();
+    
+    /**
+     * Get the File extension(s) that are supported by the parser
+     * @return a set of strings denoting the supported file extensions
+     */
+    public Set<String> supportedExtensions();
     
     /**
      * This function should be called before reusing the parser object.
diff --git a/source/de/anomic/document/Parser.java b/source/de/anomic/document/Parser.java
index 002974d1d..c316f0137 100644
--- a/source/de/anomic/document/Parser.java
+++ b/source/de/anomic/document/Parser.java
@@ -72,8 +72,9 @@ public final class Parser {
     }
     
     private static final Map<String, Idiom> mime2parser = new TreeMap<String, Idiom>(insensitiveCollator);
-    private static final Map<String, Set<String>> ext2mime = new TreeMap<String, Set<String>>(insensitiveCollator);
+    private static final Map<String, String> ext2mime = new TreeMap<String, String>(insensitiveCollator);
     private static final Set<String> denyMime = new TreeSet<String>(insensitiveCollator);
+    private static final Set<String> denyExtension = new TreeSet<String>(insensitiveCollator);
     
     static {
         initParser(new bzipParser());
@@ -103,21 +104,20 @@ public final class Parser {
     }
 
     private static void initParser(Idiom parser) {
-        for (Map.Entry<String, String> e: parser.getSupportedMimeTypes().entrySet()) {
+        String prototypeMime = null;
+        for (String mime: parser.supportedMimeTypes()) {
             // process the mime types
-            final String mimeType = normalizeMimeType(e.getKey());
+            final String mimeType = normalizeMimeType(mime);
+            if (prototypeMime == null) prototypeMime = mimeType;
             Idiom p0 = mime2parser.get(mimeType);
             if (p0 != null) log.logSevere("parser for mime '" + mimeType + "' was set to '" + p0.getName() + "', overwriting with new parser '" + parser.getName() + "'.");
             mime2parser.put(mimeType, parser);
             Log.logInfo("PARSER", "Parser for mime type '" + mimeType + "': " + parser.getName());
 
-            // process the extensions
-            String[] exts = e.getValue().split(",");
-            for (String ext: exts) {
-                Set<String> s = ext2mime.get(ext);
-                if (s == null) s = new HashSet<String>();
-                s.add(mimeType);
-                ext2mime.put(ext, s);
+            if (prototypeMime != null) for (String ext: parser.supportedExtensions()) {
+                String s = ext2mime.get(ext);
+                if (s != null) log.logSevere("parser for extension '" + ext + "' was set to mime '" + s + "', overwriting with new mime '" + prototypeMime + "'.");
+                ext2mime.put(ext, prototypeMime);
             }
         }
     }
@@ -148,9 +148,12 @@ public final class Parser {
         }
     }
 
-    public static Document parseSource(final yacyURL location,
-            final String mimeType, final String charset,
-            final File sourceFile) throws InterruptedException, ParserException {
+    public static Document parseSource(
+            final yacyURL location,
+            final String mimeType,
+            final String charset,
+            final File sourceFile
+        ) throws InterruptedException, ParserException {
 
         BufferedInputStream sourceStream = null;
         try {
@@ -174,39 +177,33 @@ public final class Parser {
         }
     }
 
-    public static Document parseSource(final yacyURL location,
-            String mimeType, final String charset,
-            final long contentLength, final InputStream sourceStream)
-            throws InterruptedException, ParserException {
+    public static Document parseSource(
+            final yacyURL location,
+            String mimeType,
+            final String charset,
+            final long contentLength,
+            final InputStream sourceStream
+        ) throws InterruptedException, ParserException {
         try {
             if (log.isFine()) log.logFine("Parsing '" + location + "' from stream");
             mimeType = normalizeMimeType(mimeType);
             final String fileExt = location.getFileExtension();
             final String documentCharset = htmlParser.patchCharsetEncoding(charset);
-            if (!supportsMime(mimeType)) {
-                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "'";
-                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-                throw new ParserException(errorMsg, location);
-            }
-            if (!supportsExtension(location)) {
-                final String errorMsg = "No parser available to parse extension of url path";
+            Idiom parser = idiomParser(location, mimeType);
+            
+            if (parser == null) {
+                final String errorMsg = "No parser available to parse extension '" + location.getFileExtension() + "' or mimetype '" + mimeType + "'";
                 log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
                 throw new ParserException(errorMsg, location);
             }
+            
             if (log.isFine()) log.logInfo("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "'.");
-            Idiom parser = mime2parser.get(normalizeMimeType(mimeType));
-            Document doc = null;
-            if (parser != null) {
-                parser.setContentLength(contentLength);
-                doc = parser.parse(location, mimeType, documentCharset, sourceStream);
-            } else {
-                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (2)";
-                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-                throw new ParserException(errorMsg, location);
-            }
+            parser.setContentLength(contentLength);
+            Document doc = parser.parse(location, mimeType, documentCharset, sourceStream);
+
             if (doc == null) {
-                final String errorMsg = "Unexpected error. Parser returned null.";
-                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                final String errorMsg = "Parsing content with file extension '" + location.getFileExtension() + "' and mimetype '" + mimeType + "' failed: document == null";
+                log.logWarning("Unable to parse '" + location + "'. " + errorMsg);
                 throw new ParserException(errorMsg, location);
             }
             return doc;
@@ -218,16 +215,66 @@ public final class Parser {
             throw new ParserException(errorMsg, location);
         }
     }
-
-    public static boolean supportsMime(String mimeType) {
+    
+    /**
+     * check if the parser supports the given content.
+     * @param url
+     * @param mimeType
+     * @return returns null if the content is supportet. If the content is not supported, return a error string.
+     */
+    public static String supports(final yacyURL url, String mimeType) {
+        try {
+            // try to get a parser. If this works, we don't need the parser itself, we just return null to show that everything is ok.
+            idiomParser(url, mimeType);
+            return null;
+        } catch (ParserException e) {
+            // in case that a parser is not available, return a error string describing the problem.
+            return e.getMessage();
+        }
+    }
+    
+    private static Idiom idiomParser(final yacyURL url, String mimeType) throws ParserException {
+        // check mime type
+        if (mimeType != null) {
+            mimeType = normalizeMimeType(mimeType);
+            if (denyMime.contains(mimeType)) throw new ParserException("mime type '" + mimeType + "' is denied", url);
+        } else {
+            mimeType = normalizeMimeType(mimeType);
+        }
+        
+        Idiom idiom = mime2parser.get(mimeType);
+        if (idiom != null) return idiom;
+        
+        // check extension
+        String ext = url.getFileExtension();
+        if (ext == null || ext.length() == 0) throw new ParserException("no file extension", url);
+        if (denyExtension.contains(ext)) throw new ParserException("file extension '" + ext + "' is denied", url);
+        mimeType = ext2mime.get(ext);
+        if (mimeType == null) throw new ParserException("no parser available", url);
+        idiom = mime2parser.get(mimeType);
+        assert idiom != null;
+        if (idiom == null) throw new ParserException("no parser available (internal error!)", url);
+        return idiom;
+    }
+    
+    public static String supportsMime(String mimeType) {
+        if (mimeType == null) return null;
         mimeType = normalizeMimeType(mimeType);
-        return !denyMime.contains(mimeType) && mime2parser.containsKey(normalizeMimeType(mimeType));
+        if (denyMime.contains(mimeType)) return "mime type '" + mimeType + "' is denied";
+        if (mime2parser.get(mimeType) == null) return "no parser for mime '" + mimeType + "' available";
+        return null;
     }
     
-    public static boolean supportsExtension(final yacyURL url) {
+    public static String supportsExtension(final yacyURL url) {
         String ext = url.getFileExtension();
-        if (ext.length() == 0) return true; // may be anything; thats ok if the mime type is ok
-        return ext2mime.containsKey(ext);
+        if (ext == null || ext.length() == 0) return null;
+        if (denyExtension.contains(ext)) return "file extension '" + ext + "' is denied";
+        String mimeType = ext2mime.get(ext);
+        if (mimeType == null) return "no parser available";
+        Idiom idiom = mime2parser.get(mimeType);
+        assert idiom != null;
+        if (idiom == null) return "no parser available (internal error!)";
+        return null;
     }
     
     public static String mimeOf(yacyURL url) {
@@ -235,9 +282,7 @@ public final class Parser {
     }
     
     public static String mimeOf(String ext) {
-        Set<String> mimes = ext2mime.get(ext);
-        if (mimes == null) return null;
-        return mimes.iterator().next();
+        return ext2mime.get(ext);
     }
     
     private static String normalizeMimeType(String mimeType) {
@@ -261,4 +306,20 @@ public final class Parser {
     public static void grantMime(String mime, boolean grant) {
         if (grant) denyMime.remove(normalizeMimeType(mime)); else denyMime.add(normalizeMimeType(mime));
     }
+    
+    public static void setDenyExtension(String denyList) {
+        denyExtension.clear();
+        for (String s: denyList.split(",")) denyExtension.add(s);
+    }
+    
+    public static String getDenyExtension() {
+        String s = "";
+        for (String d: denyExtension) s += d + ",";
+        s = s.substring(0, s.length() - 1);
+        return s;
+    }
+    
+    public static void grantExtension(String ext, boolean grant) {
+        if (grant) denyExtension.remove(ext); else denyExtension.add(ext);
+    }
 }
diff --git a/source/de/anomic/document/parser/bzipParser.java b/source/de/anomic/document/parser/bzipParser.java
index 0fec1c9e1..a0b4a3056 100644
--- a/source/de/anomic/document/parser/bzipParser.java
+++ b/source/de/anomic/document/parser/bzipParser.java
@@ -30,7 +30,9 @@ package de.anomic.document.parser;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.InputStream;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import org.apache.tools.bzip2.CBZip2InputStream;
 
 import de.anomic.document.AbstractParser;
@@ -47,24 +49,31 @@ public class bzipParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();    
-    static final String fileExtensions = "bz2,tbz,tbz2";
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/x-bzip2",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/bzip2", fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-bz2", fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-bzip",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-stuffit",fileExtensions);
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("bz2");
+        SUPPORTED_EXTENSIONS.add("tbz");
+        SUPPORTED_EXTENSIONS.add("tbz2");
+        SUPPORTED_MIME_TYPES.add("application/x-bzip2");
+        SUPPORTED_MIME_TYPES.add("application/bzip2");
+        SUPPORTED_MIME_TYPES.add("application/x-bz2");
+        SUPPORTED_MIME_TYPES.add("application/x-bzip");
+        SUPPORTED_MIME_TYPES.add("application/x-stuffit");
     }
     
     public bzipParser() {        
         super("Bzip 2 UNIX Compressed File Parser");
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public Document parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
         
         File tempFile = null;
diff --git a/source/de/anomic/document/parser/docParser.java b/source/de/anomic/document/parser/docParser.java
index fafd07133..423b3197f 100644
--- a/source/de/anomic/document/parser/docParser.java
+++ b/source/de/anomic/document/parser/docParser.java
@@ -30,7 +30,8 @@ package de.anomic.document.parser;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
 
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Idiom;
@@ -45,18 +46,20 @@ public class docParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */    
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     static {
-        String ext = "doc,docx";
-        SUPPORTED_MIME_TYPES.put("application/msword",ext);
-        SUPPORTED_MIME_TYPES.put("application/doc",ext);
-        SUPPORTED_MIME_TYPES.put("appl/text",ext);
-        SUPPORTED_MIME_TYPES.put("application/vnd.msword",ext);
-        SUPPORTED_MIME_TYPES.put("application/vnd.ms-word",ext);
-        SUPPORTED_MIME_TYPES.put("application/winword",ext);
-        SUPPORTED_MIME_TYPES.put("application/word",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-msw6",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-msword",ext);
+        SUPPORTED_EXTENSIONS.add("doc");
+        SUPPORTED_EXTENSIONS.add("docx");
+        SUPPORTED_MIME_TYPES.add("application/msword");
+        SUPPORTED_MIME_TYPES.add("application/doc");
+        SUPPORTED_MIME_TYPES.add("appl/text");
+        SUPPORTED_MIME_TYPES.add("application/vnd.msword");
+        SUPPORTED_MIME_TYPES.add("application/vnd.ms-word");
+        SUPPORTED_MIME_TYPES.add("application/winword");
+        SUPPORTED_MIME_TYPES.add("application/word");
+        SUPPORTED_MIME_TYPES.add("application/x-msw6");
+        SUPPORTED_MIME_TYPES.add("application/x-msword");
     }
     
 	public docParser() {
@@ -115,9 +118,13 @@ public class docParser extends AbstractParser implements Idiom {
         return theDoc;
 	}
 
-	public HashMap<String, String> getSupportedMimeTypes() {
-		return docParser.SUPPORTED_MIME_TYPES;
-	}
+	public Set<String> supportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+    
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
 
     @Override
 	public void reset() {
diff --git a/source/de/anomic/document/parser/gzipParser.java b/source/de/anomic/document/parser/gzipParser.java
index d3e7446c7..2963677b2 100644
--- a/source/de/anomic/document/parser/gzipParser.java
+++ b/source/de/anomic/document/parser/gzipParser.java
@@ -30,7 +30,8 @@ package de.anomic.document.parser;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.InputStream;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
 import java.util.zip.GZIPInputStream;
 
 import de.anomic.document.AbstractParser;
@@ -47,28 +48,31 @@ public class gzipParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
-    static final String ext = "gz,tgz";
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/x-gzip",ext);
-        SUPPORTED_MIME_TYPES.put("application/gzip",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-gunzip",ext);
-        SUPPORTED_MIME_TYPES.put("application/gzipped",ext);
-        SUPPORTED_MIME_TYPES.put("application/gzip-compressed",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-compressed",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-compress",ext);
-        SUPPORTED_MIME_TYPES.put("gzip/document",ext);
-        SUPPORTED_MIME_TYPES.put("application/octet-stream",ext);
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("gz");
+        SUPPORTED_EXTENSIONS.add("tgz");
+        SUPPORTED_MIME_TYPES.add("application/x-gzip");
+        SUPPORTED_MIME_TYPES.add("application/gzip");
+        SUPPORTED_MIME_TYPES.add("application/x-gunzip");
+        SUPPORTED_MIME_TYPES.add("application/gzipped");
+        SUPPORTED_MIME_TYPES.add("application/gzip-compressed");
+        SUPPORTED_MIME_TYPES.add("gzip/document");
     }     
 
     public gzipParser() {        
         super("GNU Zip Compressed Archive Parser");
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public Document parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
         
         File tempFile = null;
diff --git a/source/de/anomic/document/parser/htmlParser.java b/source/de/anomic/document/parser/htmlParser.java
index f7e6fadb8..3e686eb93 100644
--- a/source/de/anomic/document/parser/htmlParser.java
+++ b/source/de/anomic/document/parser/htmlParser.java
@@ -31,7 +31,9 @@ import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Document;
 import de.anomic.document.Idiom;
@@ -48,17 +50,29 @@ public class htmlParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();  
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     static {
-        String ext = "htm,html,shtml,xhtml,php,asp,aspx,txt,jsp,csv,pl,py";
-        SUPPORTED_MIME_TYPES.put("application/xhtml+xml", ext);
-        SUPPORTED_MIME_TYPES.put("text/html", ext);
-        SUPPORTED_MIME_TYPES.put("text/plain", ext);
-        SUPPORTED_MIME_TYPES.put("text/sgml",ext);
+        SUPPORTED_EXTENSIONS.add("htm");
+        SUPPORTED_EXTENSIONS.add("html");
+        SUPPORTED_EXTENSIONS.add("shtml");
+        SUPPORTED_EXTENSIONS.add("xhtml");
+        SUPPORTED_EXTENSIONS.add("php");
+        SUPPORTED_EXTENSIONS.add("asp");
+        SUPPORTED_EXTENSIONS.add("aspx");
+        SUPPORTED_EXTENSIONS.add("txt");
+        SUPPORTED_EXTENSIONS.add("jsp");
+        SUPPORTED_EXTENSIONS.add("csv");
+        SUPPORTED_EXTENSIONS.add("pl");
+        SUPPORTED_EXTENSIONS.add("py");
+        SUPPORTED_MIME_TYPES.add("application/xhtml+xml");
+        SUPPORTED_MIME_TYPES.add("text/html");
+        SUPPORTED_MIME_TYPES.add("text/plain");
+        SUPPORTED_MIME_TYPES.add("text/sgml");
     }
     
     public htmlParser() {
-        super("streaming html parser"); 
+        super("HTML Parser"); 
     }
     
     @Override
@@ -213,9 +227,12 @@ public class htmlParser extends AbstractParser implements Idiom {
         return encoding;
     }
 
-    
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
 }
diff --git a/source/de/anomic/document/parser/odtParser.java b/source/de/anomic/document/parser/odtParser.java
index ab91ead4e..a65244f99 100644
--- a/source/de/anomic/document/parser/odtParser.java
+++ b/source/de/anomic/document/parser/odtParser.java
@@ -35,7 +35,6 @@ import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.nio.charset.Charset;
 import java.util.Enumeration;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.zip.ZipEntry;
@@ -63,20 +62,30 @@ public class odtParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/vnd.oasis.opendocument.text","odt,ods,odp");
-        SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt,ods,odp");
-    }     
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("odt");
+        SUPPORTED_EXTENSIONS.add("ods");
+        SUPPORTED_EXTENSIONS.add("odp");
+        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text");
+        SUPPORTED_MIME_TYPES.add("application/x-vnd.oasis.opendocument.text");
+        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.presentation");
+        SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.spreadsheet");
+    }
 
     public odtParser() {        
         super("OASIS OpenDocument V2 Text Document Parser"); 
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     @Override
     public Document parse(final yacyURL location, final String mimeType, final String charset, final File dest) throws ParserException, InterruptedException {
         
diff --git a/source/de/anomic/document/parser/pdfParser.java b/source/de/anomic/document/parser/pdfParser.java
index 1f3fad031..e5dd5b089 100644
--- a/source/de/anomic/document/parser/pdfParser.java
+++ b/source/de/anomic/document/parser/pdfParser.java
@@ -33,7 +33,9 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import org.pdfbox.pdfparser.PDFParser;
 import org.pdfbox.pdmodel.PDDocument;
 import org.pdfbox.pdmodel.PDDocumentInformation;
@@ -55,24 +57,30 @@ public class pdfParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     static {
-        SUPPORTED_MIME_TYPES.put("application/pdf","pdf");
-        SUPPORTED_MIME_TYPES.put("application/x-pdf","pdf");
-        SUPPORTED_MIME_TYPES.put("application/acrobat","pdf");
-        SUPPORTED_MIME_TYPES.put("applications/vnd.pdf","pdf");
-        SUPPORTED_MIME_TYPES.put("text/pdf","pdf");
-        SUPPORTED_MIME_TYPES.put("text/x-pdf","pdf");
+        SUPPORTED_EXTENSIONS.add("pdf");
+        SUPPORTED_MIME_TYPES.add("application/pdf");
+        SUPPORTED_MIME_TYPES.add("application/x-pdf");
+        SUPPORTED_MIME_TYPES.add("application/acrobat");
+        SUPPORTED_MIME_TYPES.add("applications/vnd.pdf");
+        SUPPORTED_MIME_TYPES.add("text/pdf");
+        SUPPORTED_MIME_TYPES.add("text/x-pdf");
     }
     
     public pdfParser() {        
         super("Acrobat Portable Document Parser"); 
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public Document parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
         
         PDDocument theDocument = null;
diff --git a/source/de/anomic/document/parser/pptParser.java b/source/de/anomic/document/parser/pptParser.java
index 84067e1c7..b6d004d89 100644
--- a/source/de/anomic/document/parser/pptParser.java
+++ b/source/de/anomic/document/parser/pptParser.java
@@ -29,7 +29,9 @@ package de.anomic.document.parser;
 
 import java.io.BufferedInputStream;
 import java.io.InputStream;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
 
 import de.anomic.document.AbstractParser;
@@ -44,17 +46,20 @@ public class pptParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
-    static final String ext = "ppt,pptx,pps";
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/mspowerpoint",ext);
-        SUPPORTED_MIME_TYPES.put("application/powerpoint",ext);
-        SUPPORTED_MIME_TYPES.put("application/vnd.ms-powerpoint",ext);
-        SUPPORTED_MIME_TYPES.put("application/ms-powerpoint",ext);
-        SUPPORTED_MIME_TYPES.put("application/mspowerpnt",ext);
-        SUPPORTED_MIME_TYPES.put("application/vnd-mspowerpoint",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-powerpoint",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-m",ext);
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("ppt");
+        SUPPORTED_EXTENSIONS.add("pptx");
+        SUPPORTED_EXTENSIONS.add("pps");
+        SUPPORTED_MIME_TYPES.add("application/mspowerpoint");
+        SUPPORTED_MIME_TYPES.add("application/powerpoint");
+        SUPPORTED_MIME_TYPES.add("application/vnd.ms-powerpoint");
+        SUPPORTED_MIME_TYPES.add("application/ms-powerpoint");
+        SUPPORTED_MIME_TYPES.add("application/mspowerpnt");
+        SUPPORTED_MIME_TYPES.add("application/vnd-mspowerpoint");
+        SUPPORTED_MIME_TYPES.add("application/x-powerpoint");
+        SUPPORTED_MIME_TYPES.add("application/x-m");
    }
 
     public pptParser(){
@@ -114,9 +119,13 @@ public class pptParser extends AbstractParser implements Idiom {
         }
     }
 
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
+    
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
 
     @Override
     public void reset(){
diff --git a/source/de/anomic/document/parser/psParser.java b/source/de/anomic/document/parser/psParser.java
index 7a1652bf9..c6c8fdf09 100644
--- a/source/de/anomic/document/parser/psParser.java
+++ b/source/de/anomic/document/parser/psParser.java
@@ -34,7 +34,9 @@ import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
@@ -48,12 +50,14 @@ public class psParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();   
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/ps","ps");
-        SUPPORTED_MIME_TYPES.put("application/x-postscript","ps");
-        SUPPORTED_MIME_TYPES.put("application/x-ps","ps");
-        SUPPORTED_MIME_TYPES.put("application/x-postscript-not-eps","ps");
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("ps");
+        SUPPORTED_MIME_TYPES.add("application/ps");
+        SUPPORTED_MIME_TYPES.add("application/x-postscript");
+        SUPPORTED_MIME_TYPES.add("application/x-ps");
+        SUPPORTED_MIME_TYPES.add("application/x-postscript-not-eps");
     }
     
     private final static Object modeScan = new Object();
@@ -69,10 +73,14 @@ public class psParser extends AbstractParser implements Idiom {
 		}
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public boolean testForPs2Ascii() {
         try {
             String procOutputLine = null;
diff --git a/source/de/anomic/document/parser/rpmParser.java b/source/de/anomic/document/parser/rpmParser.java
index 452bc1572..8acec6e59 100644
--- a/source/de/anomic/document/parser/rpmParser.java
+++ b/source/de/anomic/document/parser/rpmParser.java
@@ -31,6 +31,9 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.InputStream;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import com.jguild.jrpm.io.RPMFile;
 import com.jguild.jrpm.io.datatype.DataTypeIf;
 
@@ -55,21 +58,27 @@ public class rpmParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();   
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/x-rpm","rpm");
-        SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager","rpm");    
-        SUPPORTED_MIME_TYPES.put("application/x-redhat-package-manager","rpm");         
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("rpm");
+        SUPPORTED_MIME_TYPES.add("application/x-rpm");
+        SUPPORTED_MIME_TYPES.add("application/x-redhat packet manager");    
+        SUPPORTED_MIME_TYPES.add("application/x-redhat-package-manager");         
     }
     
     public rpmParser() {        
         super("rpm Parser"); 
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public Document parse(final yacyURL location, final String mimeType, final String charset,
             final InputStream source) throws ParserException {
         File dstFile = null;
diff --git a/source/de/anomic/document/parser/rssParser.java b/source/de/anomic/document/parser/rssParser.java
index d893b6ca8..834d16879 100644
--- a/source/de/anomic/document/parser/rssParser.java
+++ b/source/de/anomic/document/parser/rssParser.java
@@ -33,8 +33,10 @@ import java.io.InputStream;
 import java.io.Writer;
 import java.nio.charset.Charset;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.Map;
+import java.util.Set;
 
 import de.anomic.content.RSSMessage;
 import de.anomic.document.AbstractParser;
@@ -58,13 +60,16 @@ public class rssParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */  
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
-    static final String fileExtensions = "xml,rss,rdf";
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     static {
-        SUPPORTED_MIME_TYPES.put("text/rss",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/rdf+xml",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/rss+xml",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/atom+xml",fileExtensions);
+        SUPPORTED_EXTENSIONS.add("xml");
+        SUPPORTED_EXTENSIONS.add("rss");
+        SUPPORTED_EXTENSIONS.add("rdf");
+        SUPPORTED_MIME_TYPES.add("text/rss");
+        SUPPORTED_MIME_TYPES.add("application/rdf+xml");
+        SUPPORTED_MIME_TYPES.add("application/rss+xml");
+        SUPPORTED_MIME_TYPES.add("application/atom+xml");
     }
     
 	public rssParser() {
@@ -174,9 +179,13 @@ public class rssParser extends AbstractParser implements Idiom {
         }
 	}
 
-	public HashMap<String, String> getSupportedMimeTypes() {
-		return SUPPORTED_MIME_TYPES;
-	}
+	public Set<String> supportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+    
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
 
     @Override
 	public void reset() {
diff --git a/source/de/anomic/document/parser/rtfParser.java b/source/de/anomic/document/parser/rtfParser.java
index f88cfb2f0..f795904c2 100644
--- a/source/de/anomic/document/parser/rtfParser.java
+++ b/source/de/anomic/document/parser/rtfParser.java
@@ -28,7 +28,9 @@
 package de.anomic.document.parser;
 
 import java.io.InputStream;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import javax.swing.text.DefaultStyledDocument;
 import javax.swing.text.rtf.RTFEditorKit;
 
@@ -44,13 +46,15 @@ public class rtfParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */    
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/rtf","rtf"); 
-        SUPPORTED_MIME_TYPES.put("text/rtf","rtf");
-        SUPPORTED_MIME_TYPES.put("application/x-rtf","rtf");
-        SUPPORTED_MIME_TYPES.put("text/richtext","rtf");
-        SUPPORTED_MIME_TYPES.put("application/x-soffice","rtf");
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("rtf");
+        SUPPORTED_MIME_TYPES.add("application/rtf");
+        SUPPORTED_MIME_TYPES.add("text/rtf");
+        SUPPORTED_MIME_TYPES.add("application/x-rtf");
+        SUPPORTED_MIME_TYPES.add("text/richtext");
+        SUPPORTED_MIME_TYPES.add("application/x-soffice");
     } 
 
 	public rtfParser() {
@@ -96,9 +100,13 @@ public class rtfParser extends AbstractParser implements Idiom {
 		}        
 	}
 
-	public HashMap<String, String> getSupportedMimeTypes() {
-		return rtfParser.SUPPORTED_MIME_TYPES;
-	}
+	public Set<String> supportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+    
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
 
 	public void reset() {
         // Nothing todo here at the moment
diff --git a/source/de/anomic/document/parser/sevenzipParser.java b/source/de/anomic/document/parser/sevenzipParser.java
index a4da103fa..2d4d2fd05 100644
--- a/source/de/anomic/document/parser/sevenzipParser.java
+++ b/source/de/anomic/document/parser/sevenzipParser.java
@@ -32,7 +32,9 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import SevenZip.ArchiveExtractCallback;
 import SevenZip.IInStream;
 import SevenZip.MyRandomAccessFile;
@@ -55,9 +57,11 @@ public class sevenzipParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */    
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>(); 
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z"); 
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("7z");
+        SUPPORTED_MIME_TYPES.add("application/x-7z-compressed"); 
     }
     
     public sevenzipParser() {
@@ -124,10 +128,14 @@ public class sevenzipParser extends AbstractParser implements Idiom {
         }
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
 
      // wrapper class to redirect output of standard ArchiveExtractCallback to serverLog
      // and parse the extracted content
diff --git a/source/de/anomic/document/parser/swfParser.java b/source/de/anomic/document/parser/swfParser.java
index 35caacab7..a2a60cc88 100644
--- a/source/de/anomic/document/parser/swfParser.java
+++ b/source/de/anomic/document/parser/swfParser.java
@@ -29,6 +29,9 @@ package de.anomic.document.parser;
 
 import java.io.InputStream;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import pt.tumba.parser.swf.SWF2HTML;
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Idiom;
@@ -42,24 +45,27 @@ public class swfParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     static {
-        SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash","swf");
-        SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash2-preview","swf");
-        SUPPORTED_MIME_TYPES.put("application/futuresplash","swf");
-        SUPPORTED_MIME_TYPES.put("image/vnd.rn-realflash","swf");
+        SUPPORTED_EXTENSIONS.add("swf");
+        SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash");
+        SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash2-preview");
+        SUPPORTED_MIME_TYPES.add("application/futuresplash");
+        SUPPORTED_MIME_TYPES.add("image/vnd.rn-realflash");
     }
 
     public swfParser() {
         super("Adobe Flash Parser");
     }
 
-    /**
-     * returns a hashtable containing the mimetypes that are supported by this class
-     */
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
+    
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
 
     /*
      * parses the source documents and returns a plasmaParserDocument containing
diff --git a/source/de/anomic/document/parser/tarParser.java b/source/de/anomic/document/parser/tarParser.java
index e7a8027ed..2b8ed3b34 100644
--- a/source/de/anomic/document/parser/tarParser.java
+++ b/source/de/anomic/document/parser/tarParser.java
@@ -34,8 +34,10 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.Map;
+import java.util.Set;
 import java.util.zip.GZIPInputStream;
 
 import com.ice.tar.TarEntry;
@@ -58,22 +60,28 @@ public class tarParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();  
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/x-tar","tar");
-        SUPPORTED_MIME_TYPES.put("application/tar","tar");
-        SUPPORTED_MIME_TYPES.put("applicaton/x-gtar","tar");
-        SUPPORTED_MIME_TYPES.put("multipart/x-tar","tar");
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("tar");
+        SUPPORTED_MIME_TYPES.add("application/x-tar");
+        SUPPORTED_MIME_TYPES.add("application/tar");
+        SUPPORTED_MIME_TYPES.add("applicaton/x-gtar");
+        SUPPORTED_MIME_TYPES.add("multipart/x-tar");
     }     
 
     public tarParser() {        
         super("Tape Archive File Parser"); 
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public Document parse(final yacyURL location, final String mimeType, final String charset, InputStream source) throws ParserException, InterruptedException {
         
         long docTextLength = 0;
diff --git a/source/de/anomic/document/parser/vcfParser.java b/source/de/anomic/document/parser/vcfParser.java
index f2ad16267..5967e0bbb 100644
--- a/source/de/anomic/document/parser/vcfParser.java
+++ b/source/de/anomic/document/parser/vcfParser.java
@@ -33,8 +33,10 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
+import java.util.Set;
 
 import de.anomic.crawler.HTTPLoader;
 import de.anomic.document.AbstractParser;
@@ -60,25 +62,31 @@ public class vcfParser extends AbstractParser implements Idiom {
      * 
      * TODO: support of x-mozilla-cpt and x-mozilla-html tags
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
-    static { 
-        SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf");
-        SUPPORTED_MIME_TYPES.put("application/vcard","vcf");
-        SUPPORTED_MIME_TYPES.put("text/anytext","vcf");
-        SUPPORTED_MIME_TYPES.put("text/directory","vcf");
-        SUPPORTED_MIME_TYPES.put("application/x-versit","vcf");
-        SUPPORTED_MIME_TYPES.put("text/x-versit","vcf");
-        SUPPORTED_MIME_TYPES.put("text/x-vcalendar","vcf");
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("vcf");
+        SUPPORTED_MIME_TYPES.add("text/x-vcard");
+        SUPPORTED_MIME_TYPES.add("application/vcard");
+        SUPPORTED_MIME_TYPES.add("text/anytext");
+        SUPPORTED_MIME_TYPES.add("text/directory");
+        SUPPORTED_MIME_TYPES.add("application/x-versit");
+        SUPPORTED_MIME_TYPES.add("text/x-versit");
+        SUPPORTED_MIME_TYPES.add("text/x-vcalendar");
     }
     
     public vcfParser() {        
         super("vCard Parser"); 
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public Document parse(final yacyURL url, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
         
         try {
diff --git a/source/de/anomic/document/parser/vsdParser.java b/source/de/anomic/document/parser/vsdParser.java
index 0bea160cc..e50604532 100644
--- a/source/de/anomic/document/parser/vsdParser.java
+++ b/source/de/anomic/document/parser/vsdParser.java
@@ -28,7 +28,9 @@
 package de.anomic.document.parser;
 
 import java.io.InputStream;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
@@ -43,29 +45,32 @@ public class vsdParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     static {
-        SUPPORTED_MIME_TYPES.put("application/visio","vsd");
-        SUPPORTED_MIME_TYPES.put("application/x-visio","vsd");
-        SUPPORTED_MIME_TYPES.put("application/vnd.visio","vsd");
-        SUPPORTED_MIME_TYPES.put("application/visio.drawing","vsd");
-        SUPPORTED_MIME_TYPES.put("application/vsd","vsd");
-        SUPPORTED_MIME_TYPES.put("application/x-vsd","vsd");
-        SUPPORTED_MIME_TYPES.put("image/x-vsd","vsd");
-        SUPPORTED_MIME_TYPES.put("zz-application/zz-winassoc-vsd","vsd");
+        SUPPORTED_EXTENSIONS.add("vsd");
+        SUPPORTED_MIME_TYPES.add("application/visio");
+        SUPPORTED_MIME_TYPES.add("application/x-visio");
+        SUPPORTED_MIME_TYPES.add("application/vnd.visio");
+        SUPPORTED_MIME_TYPES.add("application/visio.drawing");
+        SUPPORTED_MIME_TYPES.add("application/vsd");
+        SUPPORTED_MIME_TYPES.add("application/x-vsd");
+        SUPPORTED_MIME_TYPES.add("image/x-vsd");
+        SUPPORTED_MIME_TYPES.add("zz-application/zz-winassoc-vsd");
     }
 
     public vsdParser() {
         super("Microsoft Visio Parser");
     }
 
-    /**
-     * returns a hashtable containing the mimetypes that are supported by this class
-     */
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
-
+    
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     /*
      * parses the source documents and returns a plasmaParserDocument containing
      * all extracted information about the parsed document
diff --git a/source/de/anomic/document/parser/xlsParser.java b/source/de/anomic/document/parser/xlsParser.java
index 0330677e8..2cdb32ed1 100644
--- a/source/de/anomic/document/parser/xlsParser.java
+++ b/source/de/anomic/document/parser/xlsParser.java
@@ -28,7 +28,9 @@
 package de.anomic.document.parser;
 
 import java.io.InputStream;
-import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
 import org.apache.poi.hssf.eventusermodel.HSSFListener;
 import org.apache.poi.hssf.eventusermodel.HSSFRequest;
@@ -56,17 +58,19 @@ public class xlsParser extends AbstractParser implements Idiom, HSSFListener {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
     static {
-        String ext = "xls,xlsx";
-        SUPPORTED_MIME_TYPES.put("application/msexcel",ext);
-        SUPPORTED_MIME_TYPES.put("application/excel",ext);
-        SUPPORTED_MIME_TYPES.put("application/vnd.ms-excel",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-excel",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-msexcel",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-ms-excel",ext);
-        SUPPORTED_MIME_TYPES.put("application/x-dos_ms_excel",ext);
-        SUPPORTED_MIME_TYPES.put("application/xls",ext);
+        SUPPORTED_EXTENSIONS.add("xls");
+        SUPPORTED_EXTENSIONS.add("xlsx");
+        SUPPORTED_MIME_TYPES.add("application/msexcel");
+        SUPPORTED_MIME_TYPES.add("application/excel");
+        SUPPORTED_MIME_TYPES.add("application/vnd.ms-excel");
+        SUPPORTED_MIME_TYPES.add("application/x-excel");
+        SUPPORTED_MIME_TYPES.add("application/x-msexcel");
+        SUPPORTED_MIME_TYPES.add("application/x-ms-excel");
+        SUPPORTED_MIME_TYPES.add("application/x-dos_ms_excel");
+        SUPPORTED_MIME_TYPES.add("application/xls");
     }     
 
     public xlsParser(){
@@ -134,9 +138,13 @@ public class xlsParser extends AbstractParser implements Idiom, HSSFListener {
         }
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
+    
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
 
     @Override
     public void reset(){
diff --git a/source/de/anomic/document/parser/zipParser.java b/source/de/anomic/document/parser/zipParser.java
index 29a2ac431..0a874e7e6 100644
--- a/source/de/anomic/document/parser/zipParser.java
+++ b/source/de/anomic/document/parser/zipParser.java
@@ -34,8 +34,10 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.Map;
+import java.util.Set;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
@@ -56,26 +58,31 @@ public class zipParser extends AbstractParser implements Idiom {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>(); 
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/zip","zip");
-        SUPPORTED_MIME_TYPES.put("application/x-zip","zip");
-        SUPPORTED_MIME_TYPES.put("application/x-zip-compressed","zip");
-        SUPPORTED_MIME_TYPES.put("application/octet-stream","zip");
-        SUPPORTED_MIME_TYPES.put("application/x-compress","zip");
-        SUPPORTED_MIME_TYPES.put("application/x-compressed","zip");
-        SUPPORTED_MIME_TYPES.put("multipart/x-zip","zip");
-        SUPPORTED_MIME_TYPES.put("application/java-archive","jar");
+    public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
+    public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
+    static {
+        SUPPORTED_EXTENSIONS.add("zip");
+        SUPPORTED_MIME_TYPES.add("application/zip");
+        SUPPORTED_MIME_TYPES.add("application/x-zip");
+        SUPPORTED_MIME_TYPES.add("application/x-zip-compressed");
+        SUPPORTED_MIME_TYPES.add("application/x-compress");
+        SUPPORTED_MIME_TYPES.add("application/x-compressed");
+        SUPPORTED_MIME_TYPES.add("multipart/x-zip");
+        SUPPORTED_MIME_TYPES.add("application/java-archive");
     }     
 
     public zipParser() {        
-        super("Compressed Archive File Parser"); 
+        super("ZIP File Parser"); 
     }
     
-    public HashMap<String, String> getSupportedMimeTypes() {
+    public Set<String> supportedMimeTypes() {
         return SUPPORTED_MIME_TYPES;
     }
     
+    public Set<String> supportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+    
     public Document parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
         
         long docTextLength = 0;
diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index e590182c2..33c793693 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -528,7 +528,7 @@ public final class httpdProxyHandler {
 
                     final String storeError = cacheEntry.shallStoreCacheForProxy();
                     final boolean storeHTCache = cacheEntry.profile().storeHTCache();
-                    final boolean isSupportedContent = Parser.supportsExtension(cacheEntry.url()) && Parser.supportsMime(cacheEntry.getMimeType());
+                    final String supportError = Parser.supports(cacheEntry.url(), cacheEntry.getMimeType());
                     if (
                             /*
                              * Now we store the response into the htcache directory if
@@ -539,7 +539,7 @@ public final class httpdProxyHandler {
                              * b) the user has configured to use the htcache OR
                              * c) the content should be indexed
                              */
-                            ((storeHTCache) || (isSupportedContent))
+                            ((storeHTCache) || (supportError != null))
                     ) {
                         // we don't write actually into a file, only to RAM, and schedule writing the file.
                         int l = res.getResponseHeader().size();
@@ -580,7 +580,7 @@ public final class httpdProxyHandler {
                         if (theLogger.isFine()) theLogger.logFine(reqID +" "+ url.toString() + " not cached." +
                                 " StoreError=" + ((storeError==null)?"None":storeError) +
                                 " StoreHTCache=" + storeHTCache +
-                                " SupportetContent=" + isSupportedContent);
+                                " SupportError=" + supportError);
 
                         FileUtils.copy(res.getDataAsStream(), outStream);
 
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index 66c8bfd6f..8da80776c 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -42,7 +42,6 @@ import java.util.HashMap;
 import java.util.Map;
 
 import de.anomic.document.Classification;
-import de.anomic.document.Parser;
 import de.anomic.http.httpResponseHeader;
 import de.anomic.http.httpDocument;
 import de.anomic.kelondro.blob.ArrayStack;
@@ -181,10 +180,6 @@ public final class plasmaHTCache {
         return mimeType.toUpperCase().startsWith("IMAGE");
     }
 
-    public static boolean isText(final String mimeType) {
-        return Parser.supportsMime(mimeType);
-    }
-
     public static boolean noIndexingURL(final yacyURL url) {
         if (url == null) return false;
         String urlString = url.toString().toLowerCase();
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index d0d8ed50d..37fb4186c 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -1087,8 +1087,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
          * 
          * Testing if the content type is supported by the available parsers
          * ========================================================================= */
-        final boolean isSupportedContent = Parser.supportsExtension(entry.url()) && Parser.supportsMime(entry.getMimeType());
-        if (log.isFinest()) log.logFinest("STORE "+ entry.url() +" content of type "+ entry.getMimeType() +" is supported: "+ isSupportedContent);
+        final String supportError = Parser.supports(entry.url(), entry.getMimeType());
+        if (log.isFinest()) log.logFinest("STORE "+ entry.url() +" content of type "+ entry.getMimeType() + " is supported: " + supportError);
         
         /* =========================================================================
          * INDEX CONTROL HEADER
@@ -1121,7 +1121,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
          * a) the user has configured to use the htcache or
          * b) the content should be indexed
          * ========================================================================= */        
-        if (((entry.profile() != null) && (entry.profile().storeHTCache())) || (doIndexing && isSupportedContent)) {
+        if (((entry.profile() != null) && (entry.profile().storeHTCache())) || (doIndexing && supportError == null)) {
             // store response header
             /*
             if (entry.writeResourceInfo()) {
@@ -1146,7 +1146,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
         /* =========================================================================
          * INDEXING
          * ========================================================================= */          
-        if (doIndexing && isSupportedContent) {
+        if (doIndexing && supportError == null) {
             
             // enqueue for further crawling
             enQueue(this.crawler.queuePreStack.newEntry(
diff --git a/source/de/anomic/search/SnippetCache.java b/source/de/anomic/search/SnippetCache.java
index 489ca74fd..f55820d05 100644
--- a/source/de/anomic/search/SnippetCache.java
+++ b/source/de/anomic/search/SnippetCache.java
@@ -865,17 +865,15 @@ public class SnippetCache {
             }
 
             // STEP 3: if the metadata is still null try to guess the mimeType of the resource
-            if (responseHeader == null) {
-                if (Parser.supportsExtension(url)) {
-                    String supposedMime = Parser.mimeOf(url);
-                    return Parser.parseSource(url, supposedMime, null, contentLength, resourceStream);
-                }
+            String supportError = Parser.supports(url, responseHeader == null ? null : responseHeader.mime());
+            if (supportError != null) {
+                log.logInfo("could not generate snippet for " + url.toNormalform(true, false) + ": " + supportError);
                 return null;
-            }            
-            if (Parser.supportsMime(responseHeader.mime())) {
-                return Parser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), contentLength, resourceStream);
             }
-            return null;
+            if (responseHeader == null) {
+                return Parser.parseSource(url, null, null, contentLength, resourceStream);
+            }
+            return Parser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), contentLength, resourceStream);
         } catch (final InterruptedException e) {
             // interruption of thread detected
             return null;