redesign of parser mime type detection and parser steering

There is now a mime-blacklist instead of a mime-whitelist git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6190 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 57a88d435b
parent e15d27bc63
commit 57a88d435b
37 changed files with 321 additions and 526 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -247,23 +247,9 @@ releases = DATA/RELEASE
 minimumLocalDelta = 0
 minimumGlobalDelta = 500

-# the following mime-types are the whitelist for indexing
-#
-# parseableMime: specifies mime-types that can be indexed with any built-in parser
-parseableMimeTypes=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-
-# parseableMimeTypes.IMAGE: specifies mime-types that refer to image type content
-parseableMimeTypes.IMAGE=image/gif,image/jpeg,image/png,image/tiff,image/vnd.wap.wbmp,image/x-icon,image/bmp
-
-# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
-parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml
-
-# media extension string
-# a comma-separated list of extensions that denote media file formats
-# this is important to recognize <a href> - tags as not-html reference
-# These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
-mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bmp,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,vsd,war,wav,wmv,xcf,xls,zip
-parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp
+# the following mime-types are a blacklist for indexing:
+# parser.mime.deny: specifies mime-types that shall not be indexed
+parser.mime.deny=

 # Promotion Strings
 # These strings appear in the Web Mask of the YACY search client
--- a/htroot/SettingsAck_p.java
+++ b/htroot/SettingsAck_p.java
@ -29,15 +29,13 @@

 import java.net.InetSocketAddress;
 import java.net.SocketException;
-import java.util.Arrays;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.StringTokenizer;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;

-import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.http.httpRemoteProxyConfig;
 import de.anomic.http.httpd;
@ -46,6 +44,7 @@ import de.anomic.kelondro.order.Base64Order;
 import de.anomic.kelondro.order.Digest;
 import de.anomic.kelondro.util.DateFormatter;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.plasmaSwitchboardConstants;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -458,30 +457,16 @@ public class SettingsAck_p {
        if (post.containsKey("parserSettings")) {
            post.remove("parserSettings");
            
-            final HashSet<String> newConfig = new HashSet<String>();
-            
            // loop through all received settings
            final Iterator<String> keyEnum = post.keySet().iterator();
            while (keyEnum.hasNext()) {
                String key = keyEnum.next();
-                if (key.startsWith("mimename")) newConfig.add(post.get(key));
+                if (key.startsWith("mimename")) Parser.grantMime(key.substring(9), post.get(key).equals("on")); 
            }
            
-            int enabledMimesCount = 0;
-            final StringBuilder currEnabledMimesTxt = new StringBuilder();
-            final String[] enabledMimes = Classification.setEnabledParserList(newConfig);
-            Arrays.sort(enabledMimes);
-            
-            currEnabledMimesTxt.setLength(0);
-            for (int i=0; i < enabledMimes.length; i++) {
-                currEnabledMimesTxt.append(enabledMimes[i]).append(",");
-                prop.put("info_parser_" + enabledMimesCount + "_enabledMime", newConfig.toString());
-                enabledMimesCount++;
-            }
-            if (currEnabledMimesTxt.length() > 0) currEnabledMimesTxt.deleteCharAt(currEnabledMimesTxt.length()-1);  
-            env.setConfig("parseableMimeTypes", currEnabledMimesTxt.toString());
+            env.setConfig(plasmaSwitchboardConstants.PARSER_MIME_DENY, Parser.getDenyMime());
            
-            prop.put("info_parser",enabledMimesCount);
+            prop.put("info_parser", 0);
            prop.put("info", "18");
            return prop;
          
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@ -24,11 +24,9 @@
 // javac -classpath .:../Classes Settings_p.java
 // if the shell's current path is HTROOT

-import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Iterator;

-import de.anomic.document.Classification;
 import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
 import de.anomic.http.httpHeader;
@ -219,17 +217,15 @@ public final class Settings_p {
         */
        int parserIdx = 0;
        
-        final Iterator<Idiom> availableParserIter = Parser.availableParserList.values().iterator();
+        final Iterator<Idiom> availableParserIter = Parser.idioms().iterator();
        while (availableParserIter.hasNext()) {
            final Idiom parserInfo = availableParserIter.next();
            prop.put("parser_" + parserIdx + "_name", parserInfo.getName());
            
            int mimeIdx = 0;
-            final Enumeration<String> mimeTypeIter = parserInfo.getSupportedMimeTypes().keys();
-            while (mimeTypeIter.hasMoreElements()) {
-                final String mimeType = mimeTypeIter.nextElement();
+            for (String mimeType: parserInfo.getSupportedMimeTypes().keySet()) {
                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_mimetype", mimeType);
-                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", (Classification.supportedMimeTypesContains(mimeType)) ? 1 : 0);
+                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", (Parser.supportsMime(mimeType)) ? 1 : 0);
                mimeIdx++;
            }
            prop.put("parser_" + parserIdx + "_mime", mimeIdx);
--- a/source/de/anomic/crawler/FTPLoader.java
+++ b/source/de/anomic/crawler/FTPLoader.java
@ -32,7 +32,7 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.util.Date;

-import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.http.httpResponseHeader;
@ -218,15 +218,24 @@ public class FTPLoader {
    private httpDocument getFile(final ftpc ftpClient, final CrawlEntry entry) throws Exception {
        // determine the mimetype of the resource
        final yacyURL entryUrl = entry.url();
-        final String extension = Classification.getFileExt(entryUrl);
-        final String mimeType = Classification.getMimeTypeByFileExt(extension);
+        final String mimeType = Parser.mimeOf(entryUrl);
        final String path = getPath(entryUrl);

        // if the mimetype and file extension is supported we start to download
        // the file
        httpDocument htCache = null;
-        if (Classification.supportedContent(entryUrl, mimeType)) {
-            // aborting download if content is too long
+        if (!Parser.supportsExtension(entryUrl)) {
+            // if the response has not the right file type then reject file
+            log.logInfo("REJECTED WRONG EXTENSION TYPE " + mimeType + " for URL " + entry.url().toString());
+            sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, "wrong extension");
+            throw new Exception("response has not the right extension type -> rejected");
+        } else if (!Parser.supportsMime(mimeType)) {
+            // if the response has not the right file type then reject file
+            log.logInfo("REJECTED WRONG MIME TYPE " + mimeType + " for URL " + entry.url().toString());
+            sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, "wrong mime type");
+            throw new Exception("response has not the right mime type -> rejected");
+        } else {
+            // abort the download if content is too long
            final int size = ftpClient.fileSize(path);
            if (size <= maxFileSize || maxFileSize == -1) {
                // timeout for download
@ -246,11 +255,6 @@ public class FTPLoader {
                sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, "file size limit exceeded");
                throw new Exception("file size exceeds limit");
            }
-        } else {
-            // if the response has not the right file type then reject file
-            log.logInfo("REJECTED WRONG MIME/EXT TYPE " + mimeType + " for URL " + entry.url().toString());
-            sb.crawlQueues.errorURL.newEntry(entry, this.sb.peers.mySeed().hash, new Date(), 1, "wrong mime type or wrong extension");
-            throw new Exception("response has not the right file type -> rejected");
        }
        return htCache;
    }
--- a/source/de/anomic/crawler/HTTPLoader.java
+++ b/source/de/anomic/crawler/HTTPLoader.java
@ -29,7 +29,7 @@ import java.io.IOException;
 import java.util.Date;

 import de.anomic.data.Blacklist;
-import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.http.httpClient;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpResponse;
@ -156,8 +156,15 @@ public final class HTTPLoader {
                    
                    // request has been placed and result has been returned. work off response
                    //try {
-                        if (Classification.supportedContent(entry.url(), res.getResponseHeader().mime())) {
-                            
+                        if (!Parser.supportsExtension(entry.url())) {
+                            // if the response has not the right file type then reject file
+                            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "wrong extension");
+                            throw new IOException("REJECTED WRONG EXTENSION TYPE " + entry.url().getFileExtension()+ " for URL " + entry.url().toString());
+                        } else if (!Parser.supportsMime(res.getResponseHeader().mime())) {
+                            // if the response has not the right file type then reject file
+                            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "wrong mime type");
+                            throw new IOException("REJECTED WRONG MIME TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString());
+                        } else {     
                            // get the content length and check if the length is allowed
                            long contentLength = res.getResponseHeader().getContentLength();
                            if (maxFileSize >= 0 && contentLength > maxFileSize) {
@ -177,10 +184,6 @@ public final class HTTPLoader {
                            }
                            
                            htCache.setCacheArray(responseBody);
-                        } else {
-                            // if the response has not the right file type then reject file
-                            sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, "wrong mime type or wrong extension");
-                            throw new IOException("REJECTED WRONG MIME/EXT TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString());
                        }
                        return htCache;
                        /*
--- a/source/de/anomic/document/AbstractParser.java
+++ b/source/de/anomic/document/AbstractParser.java
@ -54,7 +54,7 @@ public abstract class AbstractParser implements Idiom {
    /**
     * Parser name
     */
-    protected String parserName = this.getClass().getName();
+    private String parserName;
    
    /**
     * The source file file size in bytes if the source document was passed
@ -65,7 +65,7 @@ public abstract class AbstractParser implements Idiom {
    /**
     * The Constructor of this class.
     */
-	public AbstractParser() {
+	public AbstractParser(String name) {
 		super();
 	}
    
@ -125,10 +125,7 @@ public abstract class AbstractParser implements Idiom {
                        // XXX: workaround for relative paths within document
                        + file.getPath().substring(file.getPath().indexOf(File.separatorChar) + 1)
                        + "/" + file.getName());
-                final Document subdoc = Parser.parseSource(
-                        url,
-                        Classification.getMimeTypeByFileExt(files[i].substring(files[i].indexOf('.') + 1)),
-                        null, file);
+                final Document subdoc = Parser.parseSource(url, Parser.mimeOf(url), null, file);
                // TODO: change anchors back to use '#' after archive name
                doc.addSubDocument(subdoc);
                subdoc.close();
--- a/source/de/anomic/document/Classification.java
+++ b/source/de/anomic/document/Classification.java
@ -2,8 +2,6 @@
 // (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 // first published 09.07.2009 on http://yacy.net
 //
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
 // $LastChangedDate: 2009-03-20 16:44:59 +0100 (Fr, 20 Mrz 2009) $
 // $LastChangedRevision: 5736 $
 // $LastChangedBy: borg-0300 $
@ -30,38 +28,25 @@ import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
 import java.util.Properties;
 import java.util.Set;

-import de.anomic.yacy.yacyURL;
-import de.anomic.yacy.logging.Log;
-
 public class Classification {

-	public static final HashSet<String> supportedHTMLFileExt = new HashSet<String>();
-    public static final HashSet<String> supportedHTMLMimeTypes = new HashSet<String>();
-    
    private static final HashSet<String> mediaExtSet = new HashSet<String>();
    private static final HashSet<String> imageExtSet = new HashSet<String>();
    private static final HashSet<String> audioExtSet = new HashSet<String>();
    private static final HashSet<String> videoExtSet = new HashSet<String>();
    private static final HashSet<String> appsExtSet = new HashSet<String>();
-    private static final Properties mimeTypeLookupByFileExt = new Properties();
    
-    public final static HashSet<String> enabledParserList = new HashSet<String>();
-    private final static HashSet<String> supportedFileExt = new HashSet<String>();
+    private static final Properties ext2mime = new Properties();
    
    static {
    	// load a list of extensions from file
        BufferedInputStream bufferedIn = null;
        try {
-            mimeTypeLookupByFileExt.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("httpd.mime"))));
+            ext2mime.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("httpd.mime"))));
        } catch (final IOException e) {
            System.err.println("ERROR: httpd.mime not found in settings path");
        } finally {
@ -70,219 +55,46 @@ public class Classification {
            } catch (final Exception e) {}
        }
        
-        final String apps = "sit,hqx,img,dmg,exe,com,bat,sh,vbs,zip,jar";
-        final String audio = "mp2,mp3,ogg,aac,aif,aiff,wav";
-        final String video = "swf,avi,wmv,rm,mov,mpg,mpeg,ram,m4v";
-        final String image = "jpg,jpeg,jpe,gif,png,ico,bmp";
+        final String apps = "7z,ace,arc,arj,asf,asx,bat,bin,bkf,bz2,cab,com,css,dcm,deb,dll,dmg,exe,gho,ghs,gz,hqx,img,iso,jar,lha,rar,sh,sit,sitx,tar,tbz,tgz,tib,torrent,vbs,war,zip";
+        final String audio = "aac,aif,aiff,flac,m4a,m4p,mid,mp2,mp3,oga,ogg,ram,wav,wma";
+        final String video = "3g2,3gp,3gp2,3gpp,3gpp2,3ivx,asf,asx,avi,div,divx,dv,dvx,env,f4v,flv,hdmov,m1v,m4v,m-jpeg,moov,mov,movie,mp2v,mp4,mpe,mpeg,mpg,mpg4,mv4,ogm,ogv,qt,rm,rv,vid,swf,wmv";
+        final String image = "ai,bmp,cdr,cmx,emf,eps,gif,img,jpeg,jpg,mng,pct,pdd,pdn,pict,png,psb,psd,psp,tif,tiff,wmf";
        
-        imageExtSet.addAll(extString2extList(image)); // image formats
-        audioExtSet.addAll(extString2extList(audio)); // audio formats
-        videoExtSet.addAll(extString2extList(video)); // video formats
-        appsExtSet.addAll(extString2extList(apps)); // application formats
-
-        initMediaExt(extString2extList(apps + "," + // application container
-                "tar,gz,bz2,arj,zip,rar," + // archive formats
-                "ps,xls,ppt,asf," + // text formats without support
-                audio + "," + // audio formats
-                video + "," + // video formats
-                image // image formats
-        ));
+        addSet(imageExtSet, image); // image formats
+        addSet(audioExtSet, audio); // audio formats
+        addSet(videoExtSet, video); // video formats
+        addSet(appsExtSet, apps);   // application formats
+        addSet(mediaExtSet, apps + "," + audio + "," + video + "," + image); // all media formats
    }
    
-    public static List<String> extString2extList(final String extString) {
-        final LinkedList<String> extensions = new LinkedList<String>();
-        if ((extString == null) || (extString.length() == 0)) {
-            return extensions;
-        }
-        final String[] xs = extString.split(",");
-        for (int i = 0; i < xs.length; i++)
-            extensions.add(xs[i].toLowerCase().trim());
-        return extensions;
+    private static void addSet(Set<String> set, final String extString) {
+        if ((extString == null) || (extString.length() == 0)) return;
+        for (String s: extString.split(",")) set.add(s.toLowerCase().trim());
    }

-    public static void initMediaExt(final List<String> mediaExtList) {
-        mediaExtSet.addAll(mediaExtList);
-    }
-    
-    public static boolean mediaExtContains(String mediaExt) {
+    public static boolean isMediaExtension(String mediaExt) {
        if (mediaExt == null) return false;
-        mediaExt = mediaExt.trim().toLowerCase();
-
-        if (supportedHTMLFileExt.contains(mediaExt)) return false;
-
-        if (supportedFileExtContains(mediaExt)) return false;
-
-        return mediaExtSet.contains(mediaExt);
+        return mediaExtSet.contains(mediaExt.trim().toLowerCase());
    }

-    public static boolean imageExtContains(final String imageExt) {
+    public static boolean isImageExtension(final String imageExt) {
        if (imageExt == null) return false;
        return imageExtSet.contains(imageExt.trim().toLowerCase());
    }

-    public static boolean audioExtContains(final String audioExt) {
+    public static boolean isAudioExtension(final String audioExt) {
        if (audioExt == null) return false;
        return audioExtSet.contains(audioExt.trim().toLowerCase());
    }

-    public static boolean videoExtContains(final String videoExt) {
+    public static boolean isVideoExtension(final String videoExt) {
        if (videoExt == null) return false;
        return videoExtSet.contains(videoExt.trim().toLowerCase());
    }

-    public static boolean appsExtContains(final String appsExt) {
+    public static boolean isApplicationExtension(final String appsExt) {
        if (appsExt == null) return false;
        return appsExtSet.contains(appsExt.trim().toLowerCase());
    }
    
-    public static void initHTMLParsableMimeTypes(
-            final String htmlParsableMimeTypes) {
-        final LinkedList<String> mimeTypes = new LinkedList<String>();
-        if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) {
-            return;
-        }
-        final String[] realtimeParsableMimeTypeList = htmlParsableMimeTypes
-                .split(",");
-        for (int i = 0; i < realtimeParsableMimeTypeList.length; i++) {
-            mimeTypes.add(realtimeParsableMimeTypeList[i].toLowerCase().trim());
-        }
-        supportedHTMLMimeTypes.addAll(mimeTypes);
-    }
-
-    public static String normalizeMimeType(String mimeType) {
-        // if (mimeType == null) doMimeTypeAnalysis
-        if (mimeType == null) mimeType = "application/octet-stream";
-        mimeType = mimeType.trim().toLowerCase();
-
-        final int pos = mimeType.indexOf(';');
-        return ((pos < 0) ? mimeType : mimeType.substring(0, pos));
-    }
-
-    public static String getMimeTypeByFileExt(final String fileExt) {
-        return mimeTypeLookupByFileExt.getProperty(fileExt, "application/octet-stream");
-    }
-
-    public static void initSupportedHTMLFileExt(final List<String> supportedRealtimeFileExtList) {
-        supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
-    }
-
-    static boolean HTMLParsableMimeTypesContains(String mimeType) {
-        mimeType = normalizeMimeType(mimeType);
-        return supportedHTMLMimeTypes.contains(mimeType);
-    }
-    
-    public static boolean supportedContent(final yacyURL url, String mimeType) {
-        mimeType = Classification.normalizeMimeType(mimeType);
-        if (
-                mimeType.equals("text/html") ||
-                mimeType.equals("application/xhtml+xml") ||
-                mimeType.equals("text/plain")
-            ) {
-            return supportedMimeTypesContains(mimeType);
-        }
-        return supportedMimeTypesContains(mimeType) && supportedFileExt(url);
-    }        
-    
-    public static boolean supportedMimeTypesContains(String mimeType) {
-        mimeType = Classification.normalizeMimeType(mimeType);
-        
-        if (Classification.supportedHTMLMimeTypes.contains(mimeType)) return true;
-        return enabledParserList.contains(mimeType);
-    }        
-    
-    private static boolean supportedFileExt(final yacyURL url) {
-        if (url == null) throw new NullPointerException();
-        
-        // getting the file path
-        final String name = getFileExt(url);
-        return supportedFileExtContains(name);
-    }
-    
-    public static boolean supportedFileExtContains(String fileExt) {
-        if (fileExt == null) return false;        
-        fileExt = fileExt.trim().toLowerCase();
-        if (Classification.supportedHTMLFileExt.contains(fileExt)) return true;
-
-        return supportedFileExt.contains(fileExt);
-    }        
-    
-    public static void addParseableMimeTypes(final String enabledMimeTypes) {
-        HashSet<String> mimeTypes = null;
-        if ((enabledMimeTypes == null) || (enabledMimeTypes.length() == 0)) {
-            mimeTypes = new HashSet<String>();
-        } else {            
-            final String[] enabledMimeTypeList = enabledMimeTypes.split(",");
-            mimeTypes = new HashSet<String>(enabledMimeTypeList.length);
-            for (int i = 0; i < enabledMimeTypeList.length; i++) mimeTypes.add(enabledMimeTypeList[i].toLowerCase().trim());
-        }
-        setEnabledParserList(mimeTypes);
-    }
-    
-    public static void enableAllParsers() {
-        final Set<String> availableMimeTypes = Parser.availableParserList.keySet();
-        setEnabledParserList(availableMimeTypes);
-    }
-    
-    public static String[] setEnabledParserList(final Set<String> mimeTypeSet) {
-        
-        final HashSet<String> newEnabledParsers = new HashSet<String>();
-        final HashSet<String> newSupportedFileExt = new HashSet<String>();
-        
-        if (mimeTypeSet != null) {
-            final Iterator<String> mimeTypes = mimeTypeSet.iterator();
-            while (mimeTypes.hasNext()) {
-                final String mimeType = mimeTypes.next();
-                Idiom theParser = Parser.availableParserList.get(mimeType);
-                if (theParser != null) {
-                    try {
-                        // getting a list of mimeTypes that the parser supports
-                        final Hashtable<String, String> parserSupportsMimeTypes = theParser.getSupportedMimeTypes();
-                        if (parserSupportsMimeTypes != null) {
-                            final Object supportedExtensions = parserSupportsMimeTypes.get(mimeType);
-                            if ((supportedExtensions != null) &&
-                                    (supportedExtensions instanceof String) &&
-                                    (((String)supportedExtensions).length() > 0)) {
-                                final String[] extArray = ((String)supportedExtensions).split(",");
-                                newSupportedFileExt.addAll(Arrays.asList(extArray));
-                            }
-                        }
-                        newEnabledParsers.add(mimeType);
-                        
-                    } catch (final Exception e) {
-                        Log.logSevere("PARSER", "error in setEnabledParserList", e);
-                    } finally {
-                        if (theParser != null)
-                            theParser = null; // destroy object
-                    }
-                }
-            }
-        }
-        
-        enabledParserList.addAll(newEnabledParsers);
-        supportedFileExt.addAll(newSupportedFileExt);
-
-        return newEnabledParsers.toArray(new String[newEnabledParsers.size()]);
-    }
-    
-    @SuppressWarnings("unchecked")
-    public static HashSet<String> getEnabledParserList() {
-        return (HashSet<String>) enabledParserList.clone();
-    }
-    
-    public static String getFileExt(final yacyURL url) {
-        // getting the file path
-        String name = url.getPath();
-
-        // tetermining last position of / in the file path
-        int p = name.lastIndexOf('/');
-        if (p != -1) {
-            name = name.substring(p);
-        }
-
-        // termining last position of . in file path
-        p = name.lastIndexOf('.');
-        if (p < 0)
-            return "";
-        return name.substring(p + 1);
-    }
 }
--- a/source/de/anomic/document/Document.java
+++ b/source/de/anomic/document/Document.java
@ -374,14 +374,14 @@ dc_rights
                    } else {
                        ext = u.substring(extpos + 1).toLowerCase();
                    }
-                    if (Classification.mediaExtContains(ext)) {
+                    if (Classification.isMediaExtension(ext)) {
                        // this is not a normal anchor, its a media link
-                        if (Classification.imageExtContains(ext)) {
+                        if (Classification.isImageExtension(ext)) {
                            ContentScraper.addImage(collectedImages, new ImageEntry(url, entry.getValue(), -1, -1));
                        }
-                        else if (Classification.audioExtContains(ext)) audiolinks.put(url, entry.getValue());
-                        else if (Classification.videoExtContains(ext)) videolinks.put(url, entry.getValue());
-                        else if (Classification.appsExtContains(ext)) applinks.put(url, entry.getValue());
+                        else if (Classification.isAudioExtension(ext)) audiolinks.put(url, entry.getValue());
+                        else if (Classification.isVideoExtension(ext)) videolinks.put(url, entry.getValue());
+                        else if (Classification.isApplicationExtension(ext)) applinks.put(url, entry.getValue());
                    } else {
                        hyperlinks.put(url, entry.getValue());
                    }
--- a/source/de/anomic/document/Idiom.java
+++ b/source/de/anomic/document/Idiom.java
@ -27,6 +27,7 @@ package de.anomic.document;

 import java.io.File;
 import java.io.InputStream;
+import java.util.HashMap;
 import java.util.Hashtable;

 import de.anomic.yacy.yacyURL;
@ -85,11 +86,12 @@ public interface Idiom {
    throws ParserException, InterruptedException;
            
    /**
-     * Can be used to determine the MimeType(s) that are supported by the parser
-     * @return a {@link Hashtable} containing a list of MimeTypes that are supported by 
-     * the parser
+     * Get the MimeType(s) that are supported by the parser
+     * @return a {@link Hashtable} containing a mapping from a mime type string
+     * to a comma-separated String of file extensions
+     * that are supported by the idiom parser
     */
-    public Hashtable<String, String> getSupportedMimeTypes();
+    public HashMap<String, String> getSupportedMimeTypes();
    
    /**
     * This function should be called before reusing the parser object.
--- a/source/de/anomic/document/Parser.java
+++ b/source/de/anomic/document/Parser.java
@ -31,9 +31,13 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStream;
-import java.util.HashMap;
-import java.util.Hashtable;
-import java.util.Iterator;
+import java.text.Collator;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;

 import de.anomic.document.parser.bzipParser;
 import de.anomic.document.parser.docParser;
@ -59,13 +63,24 @@ import de.anomic.yacy.logging.Log;

 public final class Parser {

-    private static final Log theLogger = new Log("PARSER");
-    public static final HashMap<String, Idiom> availableParserList = new HashMap<String, Idiom>();
+    private static final Log log = new Log("PARSER");
+    
+    // use a collator to relax when distinguishing between lowercase und uppercase letters
+    private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
+    static {
+        insensitiveCollator.setStrength(Collator.SECONDARY);
+        insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
+    }
+    
+    private static final Map<String, Idiom> mime2parser = new TreeMap<String, Idiom>(insensitiveCollator);
+    private static final Map<String, Set<String>> ext2mime = new TreeMap<String, Set<String>>(insensitiveCollator);
+    private static final Set<String> denyMime = new TreeSet<String>(insensitiveCollator);
    
    static {
        initParser(new bzipParser());
        initParser(new docParser());
        initParser(new gzipParser());
+        initParser(new htmlParser());
        initParser(new mimeTypeParser());
        initParser(new odtParser());
        initParser(new pdfParser());
@ -82,14 +97,30 @@ public final class Parser {
        initParser(new xlsParser());
        initParser(new zipParser());
    }
+    
+    public static Set<Idiom> idioms() {
+        Set<Idiom> c = new HashSet<Idiom>();
+        c.addAll(mime2parser.values());
+        return c;
+    }
+
+    private static void initParser(Idiom parser) {
+        for (Map.Entry<String, String> e: parser.getSupportedMimeTypes().entrySet()) {
+            // process the mime types
+            final String mimeType = e.getKey();
+            Idiom p0 = mime2parser.get(mimeType);
+            if (p0 != null) log.logSevere("parser for mime '" + mimeType + "' was set to '" + p0.getName() + "', overwriting with new parser.");
+            mime2parser.put(mimeType, parser);
+            Log.logInfo("PARSER", "Parser for mime type '" + mimeType + "': " + parser.getName());

-    private static void initParser(Idiom theParser) {
-        final Hashtable<String, String> supportedMimeTypes = theParser.getSupportedMimeTypes();
-        final Iterator<String> mimeTypeIterator = supportedMimeTypes.keySet().iterator();
-        while (mimeTypeIterator.hasNext()) {
-            final String mimeType = mimeTypeIterator.next();
-            availableParserList.put(mimeType, theParser);
-            Log.logInfo("PARSER", "Found parser for mimeType '" + mimeType + "': " + theParser.getName());
+            // process the extensions
+            String[] exts = e.getValue().split(",");
+            for (String ext: exts) {
+                Set<String> s = ext2mime.get(ext);
+                if (s == null) s = new HashSet<String>();
+                s.add(mimeType);
+                ext2mime.put(ext, s);
+            }
        }
    }

@ -99,10 +130,10 @@ public final class Parser {
            ParserException {
        ByteArrayInputStream byteIn = null;
        try {
-            if (theLogger.isFine()) theLogger.logFine("Parsing '" + location + "' from byte-array");
+            if (log.isFine()) log.logFine("Parsing '" + location + "' from byte-array");
            if (sourceArray == null || sourceArray.length == 0) {
                final String errorMsg = "No resource content available (1) " + (((sourceArray == null) ? "source == null" : "source.length() == 0") + ", url = " + location.toNormalform(true, false));
-                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
                throw new ParserException(errorMsg, location, errorMsg);
            }
            byteIn = new ByteArrayInputStream(sourceArray);
@ -110,7 +141,7 @@ public final class Parser {
        } catch (final Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof ParserException) throw (ParserException) e;
-            theLogger.logSevere("Unexpected exception in parseSource from byte-array: " + e.getMessage(), e);
+            log.logSevere("Unexpected exception in parseSource from byte-array: " + e.getMessage(), e);
            throw new ParserException("Unexpected exception while parsing " + location, location, e);
        } finally {
            if (byteIn != null) try {
@ -125,10 +156,10 @@ public final class Parser {

        BufferedInputStream sourceStream = null;
        try {
-            if (theLogger.isFine()) theLogger.logFine("Parsing '" + location + "' from file");
+            if (log.isFine()) log.logFine("Parsing '" + location + "' from file");
            if (!(sourceFile.exists() && sourceFile.canRead() && sourceFile.length() > 0)) {
                final String errorMsg = sourceFile.exists() ? "Empty resource file." : "No resource content available (2).";
-                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
                throw new ParserException(errorMsg, location, "document has no content");
            }
            sourceStream = new BufferedInputStream(new FileInputStream(sourceFile));
@ -136,7 +167,7 @@ public final class Parser {
        } catch (final Exception e) {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof ParserException) throw (ParserException) e;
-            theLogger.logSevere("Unexpected exception in parseSource from File: " + e.getMessage(), e);
+            log.logSevere("Unexpected exception in parseSource from File: " + e.getMessage(), e);
            throw new ParserException("Unexpected exception while parsing " + location, location, e);
        } finally {
            if (sourceStream != null)try {
@ -150,31 +181,34 @@ public final class Parser {
            final long contentLength, final InputStream sourceStream)
            throws InterruptedException, ParserException {
        try {
-            if (theLogger.isFine()) theLogger.logFine("Parsing '" + location + "' from stream");
-            mimeType = Classification.normalizeMimeType(mimeType);
-            final String fileExt = Classification.getFileExt(location);
+            if (log.isFine()) log.logFine("Parsing '" + location + "' from stream");
+            mimeType = normalizeMimeType(mimeType);
+            final String fileExt = location.getFileExtension();
            final String documentCharset = htmlParser.patchCharsetEncoding(charset);
-            if (!Classification.supportedContent(location, mimeType)) {
-                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (1)";
-                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-                throw new ParserException(errorMsg, location, "wrong mime type or wrong extension");
+            if (!supportsMime(mimeType)) {
+                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "'";
+                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg, location, "wrong mime type");
+            }
+            if (!supportsExtension(location)) {
+                final String errorMsg = "No parser available to parse extension of url path";
+                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg, location, "wrong extension");
            }
-            if (theLogger.isFine()) theLogger.logInfo("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "'.");
-            Idiom parser = availableParserList.get(Classification.normalizeMimeType(mimeType));
+            if (log.isFine()) log.logInfo("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "'.");
+            Idiom parser = mime2parser.get(normalizeMimeType(mimeType));
            Document doc = null;
            if (parser != null) {
                parser.setContentLength(contentLength);
                doc = parser.parse(location, mimeType, documentCharset, sourceStream);
-            } else if (Classification.HTMLParsableMimeTypesContains(mimeType)) {
-                doc = new htmlParser().parse(location, mimeType, documentCharset, sourceStream);
            } else {
                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (2)";
-                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
                throw new ParserException(errorMsg, location, "wrong mime type or wrong extension");
            }
            if (doc == null) {
                final String errorMsg = "Unexpected error. Parser returned null.";
-                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
                throw new ParserException(errorMsg, location);
            }
            return doc;
@ -182,9 +216,50 @@ public final class Parser {
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof ParserException) throw (ParserException) e;
            final String errorMsg = "Unexpected exception. " + e.getMessage();
-            theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
+            log.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
            throw new ParserException(errorMsg, location, e);
        }
    }

+    public static boolean supportsMime(String mimeType) {
+        return !denyMime.contains(mimeType) && mime2parser.containsKey(normalizeMimeType(mimeType));
+    }
+    
+    public static boolean supportsExtension(final yacyURL url) {
+        String ext = url.getFileExtension();
+        if (ext.length() == 0) return true; // may be anything; thats ok if the mime type is ok
+        return ext2mime.containsKey(ext);
+    }
+    
+    public static String mimeOf(yacyURL url) {
+        return mimeOf(url.getFileExtension());
+    }
+    
+    public static String mimeOf(String ext) {
+        Set<String> mimes = ext2mime.get(ext);
+        if (mimes == null) return null;
+        return mimes.iterator().next();
+    }
+    
+    private static String normalizeMimeType(String mimeType) {
+        if (mimeType == null) return "application/octet-stream";
+        final int pos = mimeType.indexOf(';');
+        return ((pos < 0) ? mimeType.trim() : mimeType.substring(0, pos).trim());
+    }
+    
+    public static void setDenyMime(String denyList) {
+        denyMime.clear();
+        for (String s: denyList.split(",")) denyMime.add(s);
+    }
+    
+    public static String getDenyMime() {
+        String s = "";
+        for (String d: denyMime) s += d + ",";
+        s = s.substring(0, s.length() - 1);
+        return s;
+    }
+    
+    public static void grantMime(String mime, boolean grant) {
+        if (grant) denyMime.remove(mime); else denyMime.add(mime);
+    }
 }
--- a/source/de/anomic/document/parser/bzipParser.java
+++ b/source/de/anomic/document/parser/bzipParser.java
@ -30,8 +30,7 @@ package de.anomic.document.parser;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.InputStream;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import org.apache.tools.bzip2.CBZip2InputStream;

 import de.anomic.document.AbstractParser;
@ -48,7 +47,7 @@ public class bzipParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();    
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();    
    static final String fileExtensions = "bz2,tbz,tbz2";
    static { 
        SUPPORTED_MIME_TYPES.put("application/x-bzip2",fileExtensions);
@ -61,11 +60,10 @@ public class bzipParser extends AbstractParser implements Idiom {
    }
    
    public bzipParser() {        
-        super();
-        this.parserName = "Bzip 2 UNIX Compressed File Parser";
+        super("Bzip 2 UNIX Compressed File Parser");
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/docParser.java
+++ b/source/de/anomic/document/parser/docParser.java
@ -28,8 +28,7 @@
 package de.anomic.document.parser;

 import java.io.InputStream;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import org.textmining.extraction.TextExtractor;
 import org.textmining.extraction.word.WordTextExtractorFactory;

@ -45,22 +44,22 @@ public class docParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */    
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static {
-        SUPPORTED_MIME_TYPES.put("application/msword","doc");
-        SUPPORTED_MIME_TYPES.put("application/doc","doc");
-        SUPPORTED_MIME_TYPES.put("appl/text","doc");
-        SUPPORTED_MIME_TYPES.put("application/vnd.msword","doc");
-        SUPPORTED_MIME_TYPES.put("application/vnd.ms-word","doc");
-        SUPPORTED_MIME_TYPES.put("application/winword","doc");
-        SUPPORTED_MIME_TYPES.put("application/word","doc");
-        SUPPORTED_MIME_TYPES.put("application/x-msw6","doc");
-        SUPPORTED_MIME_TYPES.put("application/x-msword","doc");
+        String ext = "doc,docx";
+        SUPPORTED_MIME_TYPES.put("application/msword",ext);
+        SUPPORTED_MIME_TYPES.put("application/doc",ext);
+        SUPPORTED_MIME_TYPES.put("appl/text",ext);
+        SUPPORTED_MIME_TYPES.put("application/vnd.msword",ext);
+        SUPPORTED_MIME_TYPES.put("application/vnd.ms-word",ext);
+        SUPPORTED_MIME_TYPES.put("application/winword",ext);
+        SUPPORTED_MIME_TYPES.put("application/word",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-msw6",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-msword",ext);
    }
    
 	public docParser() {
-		super();
-        this.parserName = "Word Document Parser";
+		super("Word Document Parser");
 	}

 	public Document parse(final yacyURL location, final String mimeType, final String charset,
@ -103,7 +102,7 @@ public class docParser extends AbstractParser implements Idiom {
 		}        
 	}

-	public java.util.Hashtable<String, String> getSupportedMimeTypes() {
+	public HashMap<String, String> getSupportedMimeTypes() {
 		return docParser.SUPPORTED_MIME_TYPES;
 	}

--- a/source/de/anomic/document/parser/gzipParser.java
+++ b/source/de/anomic/document/parser/gzipParser.java
@ -30,7 +30,7 @@ package de.anomic.document.parser;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.InputStream;
-import java.util.Hashtable;
+import java.util.HashMap;
 import java.util.zip.GZIPInputStream;

 import de.anomic.document.AbstractParser;
@ -47,27 +47,26 @@ public class gzipParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
-    static final String fileExtensions = "gz,tgz";
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    static final String ext = "gz,tgz";
    static { 
-        SUPPORTED_MIME_TYPES.put("application/x-gzip",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/gzip",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-gunzip",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/gzipped",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/gzip-compressed",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-compressed",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-compress",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("gzip/document",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/octet-stream",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-tar",fileExtensions);
+        SUPPORTED_MIME_TYPES.put("application/x-gzip",ext);
+        SUPPORTED_MIME_TYPES.put("application/gzip",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-gunzip",ext);
+        SUPPORTED_MIME_TYPES.put("application/gzipped",ext);
+        SUPPORTED_MIME_TYPES.put("application/gzip-compressed",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-compressed",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-compress",ext);
+        SUPPORTED_MIME_TYPES.put("gzip/document",ext);
+        SUPPORTED_MIME_TYPES.put("application/octet-stream",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-tar",ext);
    }     

    public gzipParser() {        
-        super();
-        this.parserName = "GNU Zip Compressed Archive Parser";
+        super("GNU Zip Compressed Archive Parser");
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/htmlParser.java
+++ b/source/de/anomic/document/parser/htmlParser.java
@ -31,8 +31,7 @@ import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Document;
 import de.anomic.document.Idiom;
@ -49,17 +48,17 @@ public class htmlParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();  
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/xhtml+xml","htm,html,xhtml,php,asp");
-        SUPPORTED_MIME_TYPES.put("text/html","htm,html,xhtml,php,asp");
-        SUPPORTED_MIME_TYPES.put("text/plain","htm,html,xhtml,php,asp,txt");
-        SUPPORTED_MIME_TYPES.put("text/sgml","htm,html,xhtml,php,asp,xml");
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();  
+    static {
+        String ext = "htm,html,shtml,xhtml,php,asp,aspx,txt,jsp";
+        SUPPORTED_MIME_TYPES.put("application/xhtml+xml", ext);
+        SUPPORTED_MIME_TYPES.put("text/html", ext);
+        SUPPORTED_MIME_TYPES.put("text/plain", ext);
+        SUPPORTED_MIME_TYPES.put("text/sgml",ext);
    }
    
    public htmlParser() {
-        super();
-        this.parserName = "streaming html parser"; 
+        super("streaming html parser"); 
    }
    
    @Override
@ -215,7 +214,7 @@ public class htmlParser extends AbstractParser implements Idiom {
    }

    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/mimeTypeParser.java
+++ b/source/de/anomic/document/parser/mimeTypeParser.java
@ -31,6 +31,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Hashtable;

 import net.sf.jmimemagic.Magic;
@ -54,14 +55,14 @@ public class mimeTypeParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */    
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();   
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();   
    static { 
        SUPPORTED_MIME_TYPES.put("text/xml","xml");
        SUPPORTED_MIME_TYPES.put("application/xml","xml"); 
-        SUPPORTED_MIME_TYPES.put("application/x-xml","xml");        
-        SUPPORTED_MIME_TYPES.put("application/octet-stream","");        
-        SUPPORTED_MIME_TYPES.put("application/x-compress","");
-        SUPPORTED_MIME_TYPES.put("application/x-compressed","");
+        SUPPORTED_MIME_TYPES.put("application/x-xml","xml");
+        SUPPORTED_MIME_TYPES.put("application/octet-stream","xml");    
+        SUPPORTED_MIME_TYPES.put("application/x-compress","xml");
+        SUPPORTED_MIME_TYPES.put("application/x-compressed","xml");
    } 
    
    /**
@ -71,8 +72,7 @@ public class mimeTypeParser extends AbstractParser implements Idiom {
    private static Hashtable<Thread, Integer> threadLoopDetection = new Hashtable<Thread, Integer>();
    
    public mimeTypeParser() {
-        super();
-        this.parserName = "MimeType Parser"; 
+        super("MimeType Parser"); 
    }
    
    @SuppressWarnings("unchecked")
@ -174,7 +174,7 @@ public class mimeTypeParser extends AbstractParser implements Idiom {
        
    }
    
-    public java.util.Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return mimeTypeParser.SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/odtParser.java
+++ b/source/de/anomic/document/parser/odtParser.java
@ -35,8 +35,8 @@ import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.nio.charset.Charset;
 import java.util.Enumeration;
+import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Hashtable;
 import java.util.Set;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
@ -63,18 +63,17 @@ public class odtParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static { 
        SUPPORTED_MIME_TYPES.put("application/vnd.oasis.opendocument.text","odt");
        SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt");
    }     

    public odtParser() {        
-        super();
-        this.parserName = "OASIS OpenDocument V2 Text Document Parser"; 
+        super("OASIS OpenDocument V2 Text Document Parser"); 
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/pdfParser.java
+++ b/source/de/anomic/document/parser/pdfParser.java
@ -33,8 +33,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import org.pdfbox.pdfparser.PDFParser;
 import org.pdfbox.pdmodel.PDDocument;
 import org.pdfbox.pdmodel.PDDocumentInformation;
@ -56,7 +55,7 @@ public class pdfParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static {
        SUPPORTED_MIME_TYPES.put("application/pdf","pdf");
        SUPPORTED_MIME_TYPES.put("application/x-pdf","pdf");
@ -67,11 +66,10 @@ public class pdfParser extends AbstractParser implements Idiom {
    }
    
    public pdfParser() {        
-        super();
-        this.parserName = "Acrobat Portable Document Parser"; 
+        super("Acrobat Portable Document Parser"); 
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/pptParser.java
+++ b/source/de/anomic/document/parser/pptParser.java
@ -29,8 +29,7 @@ package de.anomic.document.parser;

 import java.io.BufferedInputStream;
 import java.io.InputStream;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;

 import de.anomic.document.AbstractParser;
@ -45,22 +44,21 @@ public class pptParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
-    static final String fileExtensions = "ppt,pps";
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    static final String ext = "ppt,pps";
    static { 
-        SUPPORTED_MIME_TYPES.put("application/mspowerpoint",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/powerpoint",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/vnd.ms-powerpoint",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/ms-powerpoint",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/mspowerpnt",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/vnd-mspowerpoint",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-powerpoint",fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-m",fileExtensions);
+        SUPPORTED_MIME_TYPES.put("application/mspowerpoint",ext);
+        SUPPORTED_MIME_TYPES.put("application/powerpoint",ext);
+        SUPPORTED_MIME_TYPES.put("application/vnd.ms-powerpoint",ext);
+        SUPPORTED_MIME_TYPES.put("application/ms-powerpoint",ext);
+        SUPPORTED_MIME_TYPES.put("application/mspowerpnt",ext);
+        SUPPORTED_MIME_TYPES.put("application/vnd-mspowerpoint",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-powerpoint",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-m",ext);
   }

    public pptParser(){
-        super();
-        this.parserName = "Microsoft Powerpoint Parser";
+        super("Microsoft Powerpoint Parser");
    }

    /*
@ -116,7 +114,7 @@ public class pptParser extends AbstractParser implements Idiom {
        }
    }

-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }

--- a/source/de/anomic/document/parser/psParser.java
+++ b/source/de/anomic/document/parser/psParser.java
@ -34,8 +34,7 @@ import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
@ -49,7 +48,7 @@ public class psParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();   
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();   
    static { 
        SUPPORTED_MIME_TYPES.put("application/ps","ps");
        SUPPORTED_MIME_TYPES.put("application/x-postscript","ps");
@ -62,8 +61,7 @@ public class psParser extends AbstractParser implements Idiom {
    private static String parserMode = "java";
    
    public psParser() {        
-        super();
-        this.parserName = "PostScript Document Parser"; 
+        super("PostScript Document Parser"); 
        if (!modeScanDone) synchronized (modeScan) {
        	if (testForPs2Ascii()) parserMode = "ps2ascii";
        	else parserMode = "java";
@ -71,7 +69,7 @@ public class psParser extends AbstractParser implements Idiom {
 		}
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/rpmParser.java
+++ b/source/de/anomic/document/parser/rpmParser.java
@ -31,8 +31,6 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.InputStream;
 import java.util.HashMap;
-import java.util.Hashtable;
-
 import com.jguild.jrpm.io.RPMFile;
 import com.jguild.jrpm.io.datatype.DataTypeIf;

@ -57,7 +55,7 @@ public class rpmParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();   
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();   
    static { 
        SUPPORTED_MIME_TYPES.put("application/x-rpm","rpm");
        SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager","rpm");    
@ -65,11 +63,10 @@ public class rpmParser extends AbstractParser implements Idiom {
    }
    
    public rpmParser() {        
-        super();
-        this.parserName = "rpm Parser"; 
+        super("rpm Parser"); 
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/rssParser.java
+++ b/source/de/anomic/document/parser/rssParser.java
@ -33,7 +33,6 @@ import java.io.InputStream;
 import java.io.Writer;
 import java.nio.charset.Charset;
 import java.util.HashMap;
-import java.util.Hashtable;
 import java.util.LinkedList;
 import java.util.Map;

@ -59,7 +58,7 @@ public class rssParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */  
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static final String fileExtensions = "xml,rss,rdf";
    static {
        SUPPORTED_MIME_TYPES.put("text/rss",fileExtensions);
@ -69,8 +68,7 @@ public class rssParser extends AbstractParser implements Idiom {
    }
    
 	public rssParser() {
-		super();
-        this.parserName = "Rich Site Summary/Atom Feed Parser"; 
+		super("Rich Site Summary/Atom Feed Parser"); 
 	}

 	public Document parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
@ -176,7 +174,7 @@ public class rssParser extends AbstractParser implements Idiom {
        }
 	}

-	public Hashtable<String, String> getSupportedMimeTypes() {
+	public HashMap<String, String> getSupportedMimeTypes() {
 		return SUPPORTED_MIME_TYPES;
 	}

--- a/source/de/anomic/document/parser/rtfParser.java
+++ b/source/de/anomic/document/parser/rtfParser.java
@ -28,8 +28,7 @@
 package de.anomic.document.parser;

 import java.io.InputStream;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import javax.swing.text.DefaultStyledDocument;
 import javax.swing.text.rtf.RTFEditorKit;

@ -45,7 +44,7 @@ public class rtfParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */    
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static { 
        SUPPORTED_MIME_TYPES.put("application/rtf","rtf"); 
        SUPPORTED_MIME_TYPES.put("text/rtf","rtf");
@ -57,8 +56,7 @@ public class rtfParser extends AbstractParser implements Idiom {
    } 

 	public rtfParser() {
-		super();
-        this.parserName = "Rich Text Format Parser";  
+		super("Rich Text Format Parser");  
 	}

 	public Document parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
@ -100,7 +98,7 @@ public class rtfParser extends AbstractParser implements Idiom {
 		}        
 	}

-	public Hashtable<String, String> getSupportedMimeTypes() {
+	public HashMap<String, String> getSupportedMimeTypes() {
 		return rtfParser.SUPPORTED_MIME_TYPES;
 	}

--- a/source/de/anomic/document/parser/sevenzipParser.java
+++ b/source/de/anomic/document/parser/sevenzipParser.java
@ -32,8 +32,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import SevenZip.ArchiveExtractCallback;
 import SevenZip.IInStream;
 import SevenZip.MyRandomAccessFile;
@ -41,7 +40,6 @@ import SevenZip.Archive.IInArchive;
 import SevenZip.Archive.SevenZipEntry;
 import SevenZip.Archive.SevenZip.Handler;
 import de.anomic.document.AbstractParser;
-import de.anomic.document.Classification;
 import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
@ -57,14 +55,13 @@ public class sevenzipParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */    
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>(); 
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>(); 
    static { 
        SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z"); 
    }
    
    public sevenzipParser() {
-        super();
-        super.parserName = "7zip Archive Parser";
+        super("7zip Archive Parser");
    }
    
    public Document parse(final yacyURL location, final String mimeType, final String charset,
@ -127,7 +124,7 @@ public class sevenzipParser extends AbstractParser implements Idiom {
        }
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
@ -190,7 +187,7 @@ public class sevenzipParser extends AbstractParser implements Idiom {
                     // workaround for relative links in file, normally '#' shall be used behind the location, see
                     // below for reversion of the effects
                     final yacyURL url = yacyURL.newURL(doc.dc_source(), this.prefix + "/" + super.filePath);
-                     final String mime = Classification.getMimeTypeByFileExt(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
+                     final String mime = Parser.mimeOf(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
                     if (this.cfos.isFallback()) {
                         theDoc = Parser.parseSource(url, mime, null, this.cfos.getContentFile());
                     } else {
--- a/source/de/anomic/document/parser/swfParser.java
+++ b/source/de/anomic/document/parser/swfParser.java
@ -29,8 +29,6 @@ package de.anomic.document.parser;

 import java.io.InputStream;
 import java.util.HashMap;
-import java.util.Hashtable;
-
 import pt.tumba.parser.swf.SWF2HTML;
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Idiom;
@ -44,7 +42,7 @@ public class swfParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static {
        SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash","swf");
        SUPPORTED_MIME_TYPES.put("application/x-shockwave-flash2-preview","swf");
@ -53,14 +51,13 @@ public class swfParser extends AbstractParser implements Idiom {
    }

    public swfParser() {
-        super();
-        this.parserName = "Adobe Flash Parser";
+        super("Adobe Flash Parser");
    }

    /**
     * returns a hashtable containing the mimetypes that are supported by this class
     */
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }

--- a/source/de/anomic/document/parser/tarParser.java
+++ b/source/de/anomic/document/parser/tarParser.java
@ -34,7 +34,6 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.HashMap;
-import java.util.Hashtable;
 import java.util.LinkedList;
 import java.util.Map;
 import java.util.zip.GZIPInputStream;
@ -43,7 +42,6 @@ import com.ice.tar.TarEntry;
 import com.ice.tar.TarInputStream;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Classification;
 import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
@ -60,7 +58,7 @@ public class tarParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();  
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();  
    static { 
        SUPPORTED_MIME_TYPES.put("application/x-tar","tar");
        SUPPORTED_MIME_TYPES.put("application/tar","tar");
@ -71,11 +69,10 @@ public class tarParser extends AbstractParser implements Idiom {
    }     

    public tarParser() {        
-        super();
-        this.parserName = "Tape Archive File Parser"; 
+        super("Tape Archive File Parser"); 
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
@ -97,7 +94,7 @@ public class tarParser extends AbstractParser implements Idiom {
             * If the mimeType was not reported correcly by the webserve we
             * have to decompress it first
             */
-            final String ext = Classification.getFileExt(location).toLowerCase();
+            final String ext = location.getFileExtension().toLowerCase();
            if (ext.equals("gz") || ext.equals("tgz")) {
                source = new GZIPInputStream(source);
            }
@ -130,7 +127,7 @@ public class tarParser extends AbstractParser implements Idiom {
                final String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                
                // trying to determine the mimeType per file extension   
-                final String entryMime = Classification.getMimeTypeByFileExt(entryExt);
+                final String entryMime = Parser.mimeOf(entryExt);
                
                // getting the entry content
                File subDocTempFile = null;
--- a/source/de/anomic/document/parser/vcfParser.java
+++ b/source/de/anomic/document/parser/vcfParser.java
@ -33,7 +33,6 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.MalformedURLException;
 import java.util.HashMap;
-import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.LinkedList;

@ -61,7 +60,7 @@ public class vcfParser extends AbstractParser implements Idiom {
     * 
     * TODO: support of x-mozilla-cpt and x-mozilla-html tags
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static { 
        SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf");
        SUPPORTED_MIME_TYPES.put("application/vcard","vcf");
@ -73,11 +72,10 @@ public class vcfParser extends AbstractParser implements Idiom {
    }
    
    public vcfParser() {        
-        super();
-        this.parserName = "vCard Parser"; 
+        super("vCard Parser"); 
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
--- a/source/de/anomic/document/parser/vsdParser.java
+++ b/source/de/anomic/document/parser/vsdParser.java
@ -28,8 +28,7 @@
 package de.anomic.document.parser;

 import java.io.InputStream;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import de.anomic.document.AbstractParser;
 import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
@ -44,7 +43,7 @@ public class vsdParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
    static {
        SUPPORTED_MIME_TYPES.put("application/visio","vsd");
        SUPPORTED_MIME_TYPES.put("application/x-visio","vsd");
@ -57,14 +56,13 @@ public class vsdParser extends AbstractParser implements Idiom {
    }

    public vsdParser() {
-        super();
-        this.parserName = "Microsoft Visio Parser";
+        super("Microsoft Visio Parser");
    }

    /**
     * returns a hashtable containing the mimetypes that are supported by this class
     */
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }

--- a/source/de/anomic/document/parser/xlsParser.java
+++ b/source/de/anomic/document/parser/xlsParser.java
@ -28,8 +28,7 @@
 package de.anomic.document.parser;

 import java.io.InputStream;
-import java.util.Hashtable;
-
+import java.util.HashMap;
 import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
 import org.apache.poi.hssf.eventusermodel.HSSFListener;
 import org.apache.poi.hssf.eventusermodel.HSSFRequest;
@ -57,21 +56,21 @@ public class xlsParser extends AbstractParser implements Idiom, HSSFListener {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
-    static { 
-        SUPPORTED_MIME_TYPES.put("application/msexcel","xls");
-        SUPPORTED_MIME_TYPES.put("application/excel","xls");
-        SUPPORTED_MIME_TYPES.put("application/vnd.ms-excel","xls");
-        SUPPORTED_MIME_TYPES.put("application/x-excel","xls");
-        SUPPORTED_MIME_TYPES.put("application/x-msexcel","xls");
-        SUPPORTED_MIME_TYPES.put("application/x-ms-excel","xls");
-        SUPPORTED_MIME_TYPES.put("application/x-dos_ms_excel","xls");
-        SUPPORTED_MIME_TYPES.put("application/xls","xls");
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>();
+    static {
+        String ext = "xls,xlsx";
+        SUPPORTED_MIME_TYPES.put("application/msexcel",ext);
+        SUPPORTED_MIME_TYPES.put("application/excel",ext);
+        SUPPORTED_MIME_TYPES.put("application/vnd.ms-excel",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-excel",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-msexcel",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-ms-excel",ext);
+        SUPPORTED_MIME_TYPES.put("application/x-dos_ms_excel",ext);
+        SUPPORTED_MIME_TYPES.put("application/xls",ext);
    }     

    public xlsParser(){
-        super();
-        this.parserName = "Microsoft Excel Parser";
+        super("Microsoft Excel Parser");
    }

    /*
@ -135,7 +134,7 @@ public class xlsParser extends AbstractParser implements Idiom, HSSFListener {
        }
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }

--- a/source/de/anomic/document/parser/zipParser.java
+++ b/source/de/anomic/document/parser/zipParser.java
@ -34,14 +34,12 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.HashMap;
-import java.util.Hashtable;
 import java.util.LinkedList;
 import java.util.Map;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Classification;
 import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
@ -58,7 +56,7 @@ public class zipParser extends AbstractParser implements Idiom {
     * a list of mime types that are supported by this parser class
     * @see #getSupportedMimeTypes()
     */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>(); 
+    public static final HashMap<String, String> SUPPORTED_MIME_TYPES = new HashMap<String, String>(); 
    static { 
        SUPPORTED_MIME_TYPES.put("application/zip","zip");
        SUPPORTED_MIME_TYPES.put("application/x-zip","zip");
@ -71,11 +69,10 @@ public class zipParser extends AbstractParser implements Idiom {
    }     

    public zipParser() {        
-        super();
-        this.parserName = "Compressed Archive File Parser"; 
+        super("Compressed Archive File Parser"); 
    }
    
-    public Hashtable<String, String> getSupportedMimeTypes() {
+    public HashMap<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
    }
    
@ -118,7 +115,7 @@ public class zipParser extends AbstractParser implements Idiom {
                final String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                
                // trying to determine the mimeType per file extension   
-                final String entryMime = Classification.getMimeTypeByFileExt(entryExt);      
+                final String entryMime = Parser.mimeOf(entryExt);      
                
                // parsing the content
                File subDocTempFile = null;
--- a/source/de/anomic/http/httpdFileHandler.java
+++ b/source/de/anomic/http/httpdFileHandler.java
@ -231,7 +231,7 @@ public final class httpdFileHandler {
        }
        headers.put(httpHeader.SERVER, "AnomicHTTPD (www.anomic.de)");
        headers.put(httpHeader.DATE, DateFormatter.formatRFC1123(new Date()));
-        if(!(Classification.mediaExtContains(ext))){
+        if(!(Classification.isMediaExtension(ext))){
            headers.put(httpHeader.PRAGMA, "no-cache");         
        }
        return headers;
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@ -73,7 +73,7 @@ import java.util.zip.GZIPOutputStream;

 import de.anomic.crawler.HTTPLoader;
 import de.anomic.data.Blacklist;
-import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.document.parser.html.ContentTransformer;
 import de.anomic.document.parser.html.Transformer;
 import de.anomic.kelondro.util.DateFormatter;
@ -522,13 +522,13 @@ public final class httpdProxyHandler {
                        res.getStatusLine().substring(4), // status text
                        responseHeader);

-                if(hasBody(res.getStatusCode())) {
+                if (hasBody(res.getStatusCode())) {

                    final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);

                    final String storeError = cacheEntry.shallStoreCacheForProxy();
                    final boolean storeHTCache = cacheEntry.profile().storeHTCache();
-                    final boolean isSupportedContent = Classification.supportedContent(cacheEntry.url(), cacheEntry.getMimeType());
+                    final boolean isSupportedContent = Parser.supportsExtension(cacheEntry.url()) && Parser.supportsMime(cacheEntry.getMimeType());
                    if (
                            /*
                             * Now we store the response into the htcache directory if
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@ -42,6 +42,7 @@ import java.util.HashMap;
 import java.util.Map;

 import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.http.httpResponseHeader;
 import de.anomic.http.httpDocument;
 import de.anomic.kelondro.blob.ArrayStack;
@ -181,7 +182,7 @@ public final class plasmaHTCache {
    }

    public static boolean isText(final String mimeType) {
-        return Classification.supportedMimeTypesContains(mimeType);
+        return Parser.supportsMime(mimeType);
    }

    public static boolean noIndexingURL(final yacyURL url) {
@ -200,7 +201,7 @@ public final class plasmaHTCache {

        //php
        
-        return Classification.mediaExtContains(urlString);
+        return Classification.isMediaExtension(urlString);
    }


--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -144,7 +144,6 @@ import de.anomic.data.wiki.wikiBoard;
 import de.anomic.data.wiki.wikiCode;
 import de.anomic.data.wiki.wikiParser;
 import de.anomic.document.Condenser;
-import de.anomic.document.Classification;
 import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
 import de.anomic.document.Word;
@ -513,18 +512,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        //Init bookmarks DB
        initBookmarks();
        
-        // make parser
-        log.logConfig("Starting Parser");
-        
-        // define an extension-blacklist
-        log.logConfig("Parser: Initializing Extension Mappings for Media/Parser");
-        Classification.initMediaExt(Classification.extString2extList(getConfig(plasmaSwitchboardConstants.PARSER_MEDIA_EXT,"")));
-        Classification.initSupportedHTMLFileExt(Classification.extString2extList(getConfig(plasmaSwitchboardConstants.PARSER_MEDIA_EXT_PARSEABLE,"")));
-        
        // define a realtime parsable mimetype list
-        log.logConfig("Parser: Initializing Mime Types");
-        Classification.initHTMLParsableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES_HTML, "application/xhtml+xml,text/html,text/plain"));
-        Classification.addParseableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES, null));
+        log.logConfig("Parser: Initializing Mime Type deny list");
+        Parser.setDenyMime(getConfig(plasmaSwitchboardConstants.PARSER_MIME_DENY, null));
        
        // start a loader
        log.logConfig("Starting Crawl Loader");
@ -1098,7 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
         * 
         * Testing if the content type is supported by the available parsers
         * ========================================================================= */
-        final boolean isSupportedContent = Classification.supportedContent(entry.url(),entry.getMimeType());
+        final boolean isSupportedContent = Parser.supportsExtension(entry.url()) && Parser.supportsMime(entry.getMimeType());
        if (log.isFinest()) log.logFinest("STORE "+ entry.url() +" content of type "+ entry.getMimeType() +" is supported: "+ isSupportedContent);
        
        /* =========================================================================
--- a/source/de/anomic/plasma/plasmaSwitchboardConstants.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardConstants.java
@ -244,11 +244,7 @@ public final class plasmaSwitchboardConstants {
    public static final String RANKING_DIST_1_METHOD            = "CRDist1Method";
    public static final String RANKING_DIST_1_PERCENT           = "CRDist1Percent";
    public static final String RANKING_DIST_1_TARGET            = "CRDist1Target";
-    public static final String PARSER_MIMETYPES                 = "parseableMimeTypes";
-    public static final String PARSER_MIMETYPES_HTML            = "parseableMimeTypes.HTML";
-    public static final String PARSER_MIMETYPES_IMAGE           = "parseableMimeTypes.IMAGE";
-    public static final String PARSER_MEDIA_EXT                 = "mediaExt";
-    public static final String PARSER_MEDIA_EXT_PARSEABLE       = "parseableExt";
+    public static final String PARSER_MIME_DENY                 = "parser.mime.deny";
    /**
     * <p><code>public static final String <strong>PROXY_ONLINE_CAUTION_DELAY</strong> = "onlineCautionDelay"</code></p>
     * <p>Name of the setting how long indexing should pause after the last time the proxy was used in milliseconds</p> 
--- a/source/de/anomic/search/SnippetCache.java
+++ b/source/de/anomic/search/SnippetCache.java
@ -39,7 +39,6 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import de.anomic.document.Condenser;
-import de.anomic.document.Classification;
 import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
 import de.anomic.document.Word;
@ -867,29 +866,13 @@ public class SnippetCache {

            // STEP 3: if the metadata is still null try to guess the mimeType of the resource
            if (responseHeader == null) {
-                final String filename = url.getFileName();
-                final int p = filename.lastIndexOf('.');
-                if (    // if no extension is available
-                        (p < 0) ||
-                        // or the extension is supported by one of the parsers
-                        ((p >= 0) && (Classification.supportedFileExtContains(filename.substring(p + 1))))
-                ) {
-                    String supposedMime = "text/html";
-
-                    // if the mimeType Parser is installed we can set the mimeType to null to force
-                    // a mimetype detection
-                    if (Classification.supportedMimeTypesContains("application/octet-stream")) {
-                        supposedMime = null;
-                    } else if (p != -1){
-                        // otherwise we try to determine the mimeType per file Extension
-                        supposedMime = Classification.getMimeTypeByFileExt(filename.substring(p + 1));
-                    }
-
+                if (Parser.supportsExtension(url)) {
+                    String supposedMime = Parser.mimeOf(url);
                    return Parser.parseSource(url, supposedMime, null, contentLength, resourceStream);
                }
                return null;
            }            
-            if (Classification.supportedMimeTypesContains(responseHeader.mime())) {
+            if (Parser.supportsMime(responseHeader.mime())) {
                return Parser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), contentLength, resourceStream);
            }
            return null;
--- a/source/de/anomic/tools/mediawikiIndex.java
+++ b/source/de/anomic/tools/mediawikiIndex.java
@ -58,7 +58,6 @@ import java.util.concurrent.TimeoutException;

 import de.anomic.data.wiki.wikiCode;
 import de.anomic.data.wiki.wikiParser;
-import de.anomic.document.Classification;
 import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
@ -102,9 +101,6 @@ public class mediawikiIndex extends Thread {
        this.wparser = new wikiCode(new URL(baseURL).getHost());
        this.count = 0;
        this.start = 0;
-        // must be called before usage:
-        Classification.initHTMLParsableMimeTypes("text/html");
-        Classification.addParseableMimeTypes("text/html");
    }
    
    /**
@ -146,8 +142,6 @@ public class mediawikiIndex extends Thread {
            StringBuilder sb = new StringBuilder();
            boolean page = false, text = false;
            String title = null;
-            Classification.initHTMLParsableMimeTypes("text/html");
-            Classification.addParseableMimeTypes("text/html");
            wikiparserrecord poison = newRecord();
            int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
            BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
--- a/source/de/anomic/yacy/yacyURL.java
+++ b/source/de/anomic/yacy/yacyURL.java
@ -528,6 +528,13 @@ public class yacyURL implements Serializable {
        return path.substring(p + 1); // the 'real' file name
    }

+    public String getFileExtension() {
+        String name = getFileName();
+        int p = name.lastIndexOf('.');
+        if (p < 0) return "";
+        return name.substring(p + 1);
+    }
+    
    public String getPath() {
        return path;
    }