refactoring of the ParserDispatcher and ParserConfig: resulted into Idiom, Parser and Classification classes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6188 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 21b8704fb4
parent 8ca1f5d400
commit 21b8704fb4
40 changed files with 849 additions and 1013 deletions
--- a/htroot/SettingsAck_p.java
+++ b/htroot/SettingsAck_p.java
@ -37,7 +37,7 @@ import java.util.StringTokenizer;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;

-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.http.httpRemoteProxyConfig;
 import de.anomic.http.httpd;
@ -469,7 +469,7 @@ public class SettingsAck_p {
            
            int enabledMimesCount = 0;
            final StringBuilder currEnabledMimesTxt = new StringBuilder();
-            final String[] enabledMimes = ParserDispatcher.setEnabledParserList(newConfig);
+            final String[] enabledMimes = Classification.setEnabledParserList(newConfig);
            Arrays.sort(enabledMimes);
            
            currEnabledMimesTxt.setLength(0);
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@ -28,8 +28,9 @@ import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Iterator;

+import de.anomic.document.Classification;
+import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
-import de.anomic.document.ParserDispatcher;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.plasma.plasmaSwitchboard;
@ -218,9 +219,9 @@ public final class Settings_p {
         */
        int parserIdx = 0;
        
-        final Iterator<Parser> availableParserIter = ParserDispatcher.availableParserList.values().iterator();
+        final Iterator<Idiom> availableParserIter = Parser.availableParserList.values().iterator();
        while (availableParserIter.hasNext()) {
-            final Parser parserInfo = availableParserIter.next();
+            final Idiom parserInfo = availableParserIter.next();
            prop.put("parser_" + parserIdx + "_name", parserInfo.getName());
            
            int mimeIdx = 0;
@ -228,7 +229,7 @@ public final class Settings_p {
            while (mimeTypeIter.hasMoreElements()) {
                final String mimeType = mimeTypeIter.nextElement();
                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_mimetype", mimeType);
-                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", (ParserDispatcher.supportedMimeTypesContains(mimeType)) ? 1 : 0);
+                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", (Classification.supportedMimeTypesContains(mimeType)) ? 1 : 0);
                mimeIdx++;
            }
            prop.put("parser_" + parserIdx + "_mime", mimeIdx);
--- a/source/de/anomic/crawler/FTPLoader.java
+++ b/source/de/anomic/crawler/FTPLoader.java
@ -32,7 +32,7 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.util.Date;

-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.http.httpResponseHeader;
@ -218,14 +218,14 @@ public class FTPLoader {
    private httpDocument getFile(final ftpc ftpClient, final CrawlEntry entry) throws Exception {
        // determine the mimetype of the resource
        final yacyURL entryUrl = entry.url();
-        final String extension = ParserDispatcher.getFileExt(entryUrl);
-        final String mimeType = ParserDispatcher.getMimeTypeByFileExt(extension);
+        final String extension = Classification.getFileExt(entryUrl);
+        final String mimeType = Classification.getMimeTypeByFileExt(extension);
        final String path = getPath(entryUrl);

        // if the mimetype and file extension is supported we start to download
        // the file
        httpDocument htCache = null;
-        if (ParserDispatcher.supportedContent(entryUrl, mimeType)) {
+        if (Classification.supportedContent(entryUrl, mimeType)) {
            // aborting download if content is too long
            final int size = ftpClient.fileSize(path);
            if (size <= maxFileSize || maxFileSize == -1) {
--- a/source/de/anomic/crawler/HTTPLoader.java
+++ b/source/de/anomic/crawler/HTTPLoader.java
@ -29,7 +29,7 @@ import java.io.IOException;
 import java.util.Date;

 import de.anomic.data.Blacklist;
-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
 import de.anomic.http.httpClient;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpResponse;
@ -156,7 +156,7 @@ public final class HTTPLoader {
                    
                    // request has been placed and result has been returned. work off response
                    //try {
-                        if (ParserDispatcher.supportedContent(entry.url(), res.getResponseHeader().mime())) {
+                        if (Classification.supportedContent(entry.url(), res.getResponseHeader().mime())) {
                            
                            // get the content length and check if the length is allowed
                            long contentLength = res.getResponseHeader().getContentLength();
--- a/source/de/anomic/document/AbstractParser.java
+++ b/source/de/anomic/document/AbstractParser.java
@ -38,12 +38,12 @@ import de.anomic.yacy.yacyURL;
 import de.anomic.yacy.logging.Log;

 /**
- * New classes implementing the {@link de.anomic.document.Parser} interface
+ * New classes implementing the {@link de.anomic.document.Idiom} interface
 * can extend this class to inherit all functions already implemented in this class.
 * @author Martin Thelian
 * @version $LastChangedRevision$ / $LastChangedDate$
 */
-public abstract class AbstractParser implements Parser {
+public abstract class AbstractParser implements Idiom {
    
    /**
     * the logger class that should be used by the parser module for logging
@ -125,9 +125,9 @@ public abstract class AbstractParser implements Parser {
                        // XXX: workaround for relative paths within document
                        + file.getPath().substring(file.getPath().indexOf(File.separatorChar) + 1)
                        + "/" + file.getName());
-                final Document subdoc = ParserDispatcher.parseSource(
+                final Document subdoc = Parser.parseSource(
                        url,
-                        ParserDispatcher.getMimeTypeByFileExt(files[i].substring(files[i].indexOf('.') + 1)),
+                        Classification.getMimeTypeByFileExt(files[i].substring(files[i].indexOf('.') + 1)),
                        null, file);
                // TODO: change anchors back to use '#' after archive name
                doc.addSubDocument(subdoc);
@ -150,7 +150,7 @@ public abstract class AbstractParser implements Parser {
     * and some additional metadata.
 	 * @throws ParserException if the content could not be parsed properly 
 	 * 
-	 * @see de.anomic.document.Parser#parse(de.anomic.net.URL, java.lang.String, byte[])
+	 * @see de.anomic.document.Idiom#parse(de.anomic.net.URL, java.lang.String, byte[])
 	 */
 	public Document parse(
            final yacyURL location, 
@ -185,7 +185,7 @@ public abstract class AbstractParser implements Parser {
     * and some additional metadata.
 	 * @throws ParserException if the content could not be parsed properly 
 	 * 
-	 * @see de.anomic.document.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.File)
+	 * @see de.anomic.document.Idiom#parse(de.anomic.net.URL, java.lang.String, java.io.File)
 	 */
 	public Document parse(
            final yacyURL location, 
@ -220,7 +220,7 @@ public abstract class AbstractParser implements Parser {
     * and some additional metadata.
     * @throws ParserException if the content could not be parsed properly 
     * 
-     * @see de.anomic.document.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.InputStream)
+     * @see de.anomic.document.Idiom#parse(de.anomic.net.URL, java.lang.String, java.io.InputStream)
     */
    public abstract Document parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException;
    
--- a/source/de/anomic/document/Classification.java
+++ b/source/de/anomic/document/Classification.java
@ -0,0 +1,288 @@
+// Classification.java
+// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 09.07.2009 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2009-03-20 16:44:59 +0100 (Fr, 20 Mrz 2009) $
+// $LastChangedRevision: 5736 $
+// $LastChangedBy: borg-0300 $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package de.anomic.document;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+
+import de.anomic.yacy.yacyURL;
+import de.anomic.yacy.logging.Log;
+
+public class Classification {
+
+	public static final HashSet<String> supportedHTMLFileExt = new HashSet<String>();
+    public static final HashSet<String> supportedHTMLMimeTypes = new HashSet<String>();
+    
+    private static final HashSet<String> mediaExtSet = new HashSet<String>();
+    private static final HashSet<String> imageExtSet = new HashSet<String>();
+    private static final HashSet<String> audioExtSet = new HashSet<String>();
+    private static final HashSet<String> videoExtSet = new HashSet<String>();
+    private static final HashSet<String> appsExtSet = new HashSet<String>();
+    private static final Properties mimeTypeLookupByFileExt = new Properties();
+    
+    public final static HashSet<String> enabledParserList = new HashSet<String>();
+    private final static HashSet<String> supportedFileExt = new HashSet<String>();
+    
+    static {
+    	// load a list of extensions from file
+        BufferedInputStream bufferedIn = null;
+        try {
+            mimeTypeLookupByFileExt.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("httpd.mime"))));
+        } catch (final IOException e) {
+            System.err.println("ERROR: httpd.mime not found in settings path");
+        } finally {
+            if (bufferedIn != null) try {
+                bufferedIn.close();
+            } catch (final Exception e) {}
+        }
+        
+        final String apps = "sit,hqx,img,dmg,exe,com,bat,sh,vbs,zip,jar";
+        final String audio = "mp2,mp3,ogg,aac,aif,aiff,wav";
+        final String video = "swf,avi,wmv,rm,mov,mpg,mpeg,ram,m4v";
+        final String image = "jpg,jpeg,jpe,gif,png,ico,bmp";
+        
+        imageExtSet.addAll(extString2extList(image)); // image formats
+        audioExtSet.addAll(extString2extList(audio)); // audio formats
+        videoExtSet.addAll(extString2extList(video)); // video formats
+        appsExtSet.addAll(extString2extList(apps)); // application formats
+
+        initMediaExt(extString2extList(apps + "," + // application container
+                "tar,gz,bz2,arj,zip,rar," + // archive formats
+                "ps,xls,ppt,asf," + // text formats without support
+                audio + "," + // audio formats
+                video + "," + // video formats
+                image // image formats
+        ));
+    }
+    
+    public static List<String> extString2extList(final String extString) {
+        final LinkedList<String> extensions = new LinkedList<String>();
+        if ((extString == null) || (extString.length() == 0)) {
+            return extensions;
+        }
+        final String[] xs = extString.split(",");
+        for (int i = 0; i < xs.length; i++)
+            extensions.add(xs[i].toLowerCase().trim());
+        return extensions;
+    }
+
+    public static void initMediaExt(final List<String> mediaExtList) {
+        mediaExtSet.addAll(mediaExtList);
+    }
+    
+    public static boolean mediaExtContains(String mediaExt) {
+        if (mediaExt == null) return false;
+        mediaExt = mediaExt.trim().toLowerCase();
+
+        if (supportedHTMLFileExt.contains(mediaExt)) return false;
+
+        if (supportedFileExtContains(mediaExt)) return false;
+
+        return mediaExtSet.contains(mediaExt);
+    }
+
+    public static boolean imageExtContains(final String imageExt) {
+        if (imageExt == null) return false;
+        return imageExtSet.contains(imageExt.trim().toLowerCase());
+    }
+
+    public static boolean audioExtContains(final String audioExt) {
+        if (audioExt == null) return false;
+        return audioExtSet.contains(audioExt.trim().toLowerCase());
+    }
+
+    public static boolean videoExtContains(final String videoExt) {
+        if (videoExt == null) return false;
+        return videoExtSet.contains(videoExt.trim().toLowerCase());
+    }
+
+    public static boolean appsExtContains(final String appsExt) {
+        if (appsExt == null) return false;
+        return appsExtSet.contains(appsExt.trim().toLowerCase());
+    }
+    
+    public static void initHTMLParsableMimeTypes(
+            final String htmlParsableMimeTypes) {
+        final LinkedList<String> mimeTypes = new LinkedList<String>();
+        if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) {
+            return;
+        }
+        final String[] realtimeParsableMimeTypeList = htmlParsableMimeTypes
+                .split(",");
+        for (int i = 0; i < realtimeParsableMimeTypeList.length; i++) {
+            mimeTypes.add(realtimeParsableMimeTypeList[i].toLowerCase().trim());
+        }
+        supportedHTMLMimeTypes.addAll(mimeTypes);
+    }
+
+    public static String normalizeMimeType(String mimeType) {
+        // if (mimeType == null) doMimeTypeAnalysis
+        if (mimeType == null) mimeType = "application/octet-stream";
+        mimeType = mimeType.trim().toLowerCase();
+
+        final int pos = mimeType.indexOf(';');
+        return ((pos < 0) ? mimeType : mimeType.substring(0, pos));
+    }
+
+    public static String getMimeTypeByFileExt(final String fileExt) {
+        return mimeTypeLookupByFileExt.getProperty(fileExt, "application/octet-stream");
+    }
+
+    public static void initSupportedHTMLFileExt(final List<String> supportedRealtimeFileExtList) {
+        supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
+    }
+
+    static boolean HTMLParsableMimeTypesContains(String mimeType) {
+        mimeType = normalizeMimeType(mimeType);
+        return supportedHTMLMimeTypes.contains(mimeType);
+    }
+    
+    public static boolean supportedContent(final yacyURL url, String mimeType) {
+        mimeType = Classification.normalizeMimeType(mimeType);
+        if (
+                mimeType.equals("text/html") ||
+                mimeType.equals("application/xhtml+xml") ||
+                mimeType.equals("text/plain")
+            ) {
+            return supportedMimeTypesContains(mimeType);
+        }
+        return supportedMimeTypesContains(mimeType) && supportedFileExt(url);
+    }        
+    
+    public static boolean supportedMimeTypesContains(String mimeType) {
+        mimeType = Classification.normalizeMimeType(mimeType);
+        
+        if (Classification.supportedHTMLMimeTypes.contains(mimeType)) return true;
+        return enabledParserList.contains(mimeType);
+    }        
+    
+    private static boolean supportedFileExt(final yacyURL url) {
+        if (url == null) throw new NullPointerException();
+        
+        // getting the file path
+        final String name = getFileExt(url);
+        return supportedFileExtContains(name);
+    }
+    
+    public static boolean supportedFileExtContains(String fileExt) {
+        if (fileExt == null) return false;        
+        fileExt = fileExt.trim().toLowerCase();
+        if (Classification.supportedHTMLFileExt.contains(fileExt)) return true;
+
+        return supportedFileExt.contains(fileExt);
+    }        
+    
+    public static void addParseableMimeTypes(final String enabledMimeTypes) {
+        HashSet<String> mimeTypes = null;
+        if ((enabledMimeTypes == null) || (enabledMimeTypes.length() == 0)) {
+            mimeTypes = new HashSet<String>();
+        } else {            
+            final String[] enabledMimeTypeList = enabledMimeTypes.split(",");
+            mimeTypes = new HashSet<String>(enabledMimeTypeList.length);
+            for (int i = 0; i < enabledMimeTypeList.length; i++) mimeTypes.add(enabledMimeTypeList[i].toLowerCase().trim());
+        }
+        setEnabledParserList(mimeTypes);
+    }
+    
+    public static void enableAllParsers() {
+        final Set<String> availableMimeTypes = Parser.availableParserList.keySet();
+        setEnabledParserList(availableMimeTypes);
+    }
+    
+    public static String[] setEnabledParserList(final Set<String> mimeTypeSet) {
+        
+        final HashSet<String> newEnabledParsers = new HashSet<String>();
+        final HashSet<String> newSupportedFileExt = new HashSet<String>();
+        
+        if (mimeTypeSet != null) {
+            final Iterator<String> mimeTypes = mimeTypeSet.iterator();
+            while (mimeTypes.hasNext()) {
+                final String mimeType = mimeTypes.next();
+                Idiom theParser = Parser.availableParserList.get(mimeType);
+                if (theParser != null) {
+                    try {
+                        // getting a list of mimeTypes that the parser supports
+                        final Hashtable<String, String> parserSupportsMimeTypes = theParser.getSupportedMimeTypes();
+                        if (parserSupportsMimeTypes != null) {
+                            final Object supportedExtensions = parserSupportsMimeTypes.get(mimeType);
+                            if ((supportedExtensions != null) &&
+                                    (supportedExtensions instanceof String) &&
+                                    (((String)supportedExtensions).length() > 0)) {
+                                final String[] extArray = ((String)supportedExtensions).split(",");
+                                newSupportedFileExt.addAll(Arrays.asList(extArray));
+                            }
+                        }
+                        newEnabledParsers.add(mimeType);
+                        
+                    } catch (final Exception e) {
+                        Log.logSevere("PARSER", "error in setEnabledParserList", e);
+                    } finally {
+                        if (theParser != null)
+                            theParser = null; // destroy object
+                    }
+                }
+            }
+        }
+        
+        enabledParserList.addAll(newEnabledParsers);
+        supportedFileExt.addAll(newSupportedFileExt);
+
+        return newEnabledParsers.toArray(new String[newEnabledParsers.size()]);
+    }
+    
+    @SuppressWarnings("unchecked")
+    public static HashSet<String> getEnabledParserList() {
+        return (HashSet<String>) enabledParserList.clone();
+    }
+    
+    public static String getFileExt(final yacyURL url) {
+        // getting the file path
+        String name = url.getPath();
+
+        // tetermining last position of / in the file path
+        int p = name.lastIndexOf('/');
+        if (p != -1) {
+            name = name.substring(p);
+        }
+
+        // termining last position of . in file path
+        p = name.lastIndexOf('.');
+        if (p < 0)
+            return "";
+        return name.substring(p + 1);
+    }
+}
--- a/source/de/anomic/document/Condenser.java
+++ b/source/de/anomic/document/Condenser.java
@ -33,7 +33,6 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.RandomAccessFile;
 import java.io.Reader;
 import java.io.UnsupportedEncodingException;
 import java.util.Enumeration;
@ -295,33 +294,14 @@ public final class Condenser {
        int idx;
        int wordInSentenceCounter = 1;
        boolean comb_indexof = false, last_last = false, last_index = false;
-        RandomAccessFile fa;
-        final boolean dumpWords = false;
        final HashMap<StringBuilder, Phrase> sentences = new HashMap<StringBuilder, Phrase>();
        
-        if (dumpWords) try {
-            fa = new RandomAccessFile(new File("dump.txt"), "rw");
-            fa.seek(fa.length());
-        } catch (final IOException e) {
-            e.printStackTrace();
-            fa = null;
-        }
-        
        // read source
        final sievedWordsEnum wordenum = new sievedWordsEnum(is);
        while (wordenum.hasMoreElements()) {
            word = (new String(wordenum.nextElement())).toLowerCase(Locale.ENGLISH); // TODO: does toLowerCase work for non ISO-8859-1 chars?
            if (languageIdentificator != null) languageIdentificator.add(word);
            if (word.length() < wordminsize) continue;
-            //System.out.println("PARSED-WORD " + word);
-            
-            //This is useful for testing what YaCy "sees" of a website.
-            if (dumpWords && fa != null) try {
-				fa.writeBytes(word);
-				fa.write(160);
-			} catch (final IOException e) {
-				e.printStackTrace();
-			}
            
            // distinguish punctuation and words
            wordlen = word.length();
@ -397,15 +377,6 @@ public final class Condenser {
                sentences.put(sentence, new Phrase(sentenceHandleCount++));
            }
        }
-        
-        if (dumpWords && fa != null) try {
-            fa.write('\n');
-            fa.close();
-        } catch (final IOException e) {
-            e.printStackTrace();
-        }
-
-        // -------------------

        // we reconstruct the sentence hashtable
        // and order the entries by the number of the sentence
--- a/source/de/anomic/document/Document.java
+++ b/source/de/anomic/document/Document.java
@ -30,9 +30,12 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
+import java.net.MalformedURLException;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@ -97,7 +100,7 @@ public class Document {
        this.languages = languages;
        
        if (text == null) try {
-            this.text = new serverCachedFileOutputStream(Parser.MAX_KEEP_IN_MEMORY_SIZE);
+            this.text = new serverCachedFileOutputStream(Idiom.MAX_KEEP_IN_MEMORY_SIZE);
        } catch (final IOException e) {
            e.printStackTrace();
            this.text = new StringBuilder();
@ -371,14 +374,14 @@ dc_rights
                    } else {
                        ext = u.substring(extpos + 1).toLowerCase();
                    }
-                    if (ParserDispatcher.mediaExtContains(ext)) {
+                    if (Classification.mediaExtContains(ext)) {
                        // this is not a normal anchor, its a media link
-                        if (ParserDispatcher.imageExtContains(ext)) {
+                        if (Classification.imageExtContains(ext)) {
                            ContentScraper.addImage(collectedImages, new ImageEntry(url, entry.getValue(), -1, -1));
                        }
-                        else if (ParserDispatcher.audioExtContains(ext)) audiolinks.put(url, entry.getValue());
-                        else if (ParserDispatcher.videoExtContains(ext)) videolinks.put(url, entry.getValue());
-                        else if (ParserDispatcher.appsExtContains(ext)) applinks.put(url, entry.getValue());
+                        else if (Classification.audioExtContains(ext)) audiolinks.put(url, entry.getValue());
+                        else if (Classification.videoExtContains(ext)) videolinks.put(url, entry.getValue());
+                        else if (Classification.appsExtContains(ext)) applinks.put(url, entry.getValue());
                    } else {
                        hyperlinks.put(url, entry.getValue());
                    }
@ -396,21 +399,117 @@ dc_rights
        // we add artificial hyperlinks to the hyperlink set
        // that can be calculated from given hyperlinks and imagelinks
        
-        hyperlinks.putAll(ParserDispatcher.allReflinks(images.values()));
-        hyperlinks.putAll(ParserDispatcher.allReflinks(audiolinks.keySet()));
-        hyperlinks.putAll(ParserDispatcher.allReflinks(videolinks.keySet()));
-        hyperlinks.putAll(ParserDispatcher.allReflinks(applinks.keySet()));
+        hyperlinks.putAll(allReflinks(images.values()));
+        hyperlinks.putAll(allReflinks(audiolinks.keySet()));
+        hyperlinks.putAll(allReflinks(videolinks.keySet()));
+        hyperlinks.putAll(allReflinks(applinks.keySet()));
        /*
-        hyperlinks.putAll(plasmaParser.allSubpaths(hyperlinks.keySet()));
-        hyperlinks.putAll(plasmaParser.allSubpaths(images.values()));
-        hyperlinks.putAll(plasmaParser.allSubpaths(audiolinks.keySet()));
-        hyperlinks.putAll(plasmaParser.allSubpaths(videolinks.keySet()));
-        hyperlinks.putAll(plasmaParser.allSubpaths(applinks.keySet()));
+        hyperlinks.putAll(allSubpaths(hyperlinks.keySet()));
+        hyperlinks.putAll(allSubpaths(images.values()));
+        hyperlinks.putAll(allSubpaths(audiolinks.keySet()));
+        hyperlinks.putAll(allSubpaths(videolinks.keySet()));
+        hyperlinks.putAll(allSubpaths(applinks.keySet()));
         */        
        // don't do this again
        this.resorted = true;
    }
    
+    public static Map<yacyURL, String> allSubpaths(final Collection<?> links) {
+        // links is either a Set of Strings (urls) or a Set of
+        // htmlFilterImageEntries
+        final HashSet<String> h = new HashSet<String>();
+        Iterator<?> i = links.iterator();
+        Object o;
+        yacyURL url;
+        String u;
+        int pos;
+        int l;
+        while (i.hasNext())
+            try {
+                o = i.next();
+                if (o instanceof yacyURL) url = (yacyURL) o;
+                else if (o instanceof String) url = new yacyURL((String) o, null);
+                else if (o instanceof ImageEntry) url = ((ImageEntry) o).url();
+                else {
+                    assert false;
+                    continue;
+                }
+                u = url.toNormalform(true, true);
+                if (u.endsWith("/"))
+                    u = u.substring(0, u.length() - 1);
+                pos = u.lastIndexOf('/');
+                while (pos > 8) {
+                    l = u.length();
+                    u = u.substring(0, pos + 1);
+                    h.add(u);
+                    u = u.substring(0, pos);
+                    assert (u.length() < l) : "u = " + u;
+                    pos = u.lastIndexOf('/');
+                }
+            } catch (final MalformedURLException e) { }
+        // now convert the strings to yacyURLs
+        i = h.iterator();
+        final HashMap<yacyURL, String> v = new HashMap<yacyURL, String>();
+        while (i.hasNext()) {
+            u = (String) i.next();
+            try {
+                url = new yacyURL(u, null);
+                v.put(url, "sub");
+            } catch (final MalformedURLException e) {
+            }
+        }
+        return v;
+    }
+    
+    public static Map<yacyURL, String> allReflinks(final Collection<?> links) {
+        // links is either a Set of Strings (with urls) or
+        // htmlFilterImageEntries
+        // we find all links that are part of a reference inside a url
+        final HashMap<yacyURL, String> v = new HashMap<yacyURL, String>();
+        final Iterator<?> i = links.iterator();
+        Object o;
+        yacyURL url;
+        String u;
+        int pos;
+        loop: while (i.hasNext())
+            try {
+                o = i.next();
+                if (o instanceof yacyURL)
+                    url = (yacyURL) o;
+                else if (o instanceof String)
+                    url = new yacyURL((String) o, null);
+                else if (o instanceof ImageEntry)
+                    url = ((ImageEntry) o).url();
+                else {
+                    assert false;
+                    continue;
+                }
+                u = url.toNormalform(true, true);
+                if ((pos = u.toLowerCase().indexOf("http://", 7)) > 0) {
+                    i.remove();
+                    u = u.substring(pos);
+                    while ((pos = u.toLowerCase().indexOf("http://", 7)) > 0)
+                        u = u.substring(pos);
+                    url = new yacyURL(u, null);
+                    if (!(v.containsKey(url)))
+                        v.put(url, "ref");
+                    continue loop;
+                }
+                if ((pos = u.toLowerCase().indexOf("/www.", 7)) > 0) {
+                    i.remove();
+                    u = "http:/" + u.substring(pos);
+                    while ((pos = u.toLowerCase().indexOf("/www.", 7)) > 0)
+                        u = "http:/" + u.substring(pos);
+                    url = new yacyURL(u, null);
+                    if (!(v.containsKey(url)))
+                        v.put(url, "ref");
+                    continue loop;
+                }
+            } catch (final MalformedURLException e) {
+            }
+        return v;
+    }
+    
    public void addSubDocument(final Document doc) throws IOException {
        this.sections.addAll(Arrays.asList(doc.getSectionTitles()));
        
@ -423,7 +522,7 @@ dc_rights
        this.description.append(doc.dc_description());
        
        if (!(this.text instanceof serverCachedFileOutputStream)) {
-            this.text = new serverCachedFileOutputStream(Parser.MAX_KEEP_IN_MEMORY_SIZE);
+            this.text = new serverCachedFileOutputStream(Idiom.MAX_KEEP_IN_MEMORY_SIZE);
            FileUtils.copy(getText(), (serverCachedFileOutputStream)this.text);
        }
        FileUtils.copy(doc.getText(), (serverCachedFileOutputStream)this.text);
--- a/source/de/anomic/document/Idiom.java
+++ b/source/de/anomic/document/Idiom.java
@ -0,0 +1,108 @@
+//Idiom.java 
+//------------------------
+//part of YaCy
+//(C) by Michael Peter Christen; mc@yacy.net
+//first published on http://www.anomic.de
+//Frankfurt, Germany, 2005
+//
+//this file was contributed by Martin Thelian
+//last major change: $LastChangedDate$ by $LastChangedBy$
+//Revision: $LastChangedRevision$
+//
+//This program is free software; you can redistribute it and/or modify
+//it under the terms of the GNU General Public License as published by
+//the Free Software Foundation; either version 2 of the License, or
+//(at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//GNU General Public License for more details.
+//
+//You should have received a copy of the GNU General Public License
+//along with this program; if not, write to the Free Software
+//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package de.anomic.document;
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.Hashtable;
+
+import de.anomic.yacy.yacyURL;
+
+/**
+ * This interface defines a list of methods that needs to be implemented
+ * by each content parser class.
+ * @author Martin Thelian
+ * @version $LastChangedRevision$ / $LastChangedDate$
+ */
+public interface Idiom {
+    
+
+    public static long MAX_KEEP_IN_MEMORY_SIZE = 5 * 1024 * 1024;    
+    
+    /**
+     * Parsing a document available as byte array
+     * @param location the origin of the document 
+     * @param mimeType the mimetype of the document
+     * @param charset the supposed charset of the document or <code>null</code> if unkown
+     * @param source the content byte array
+     * @return a {@link Document} containing the extracted plain text of the document
+     * and some additional metadata.
+     *  
+     * @throws ParserException if the content could not be parsed properly 
+     */
+    public Document parse(yacyURL location, String mimeType, String charset, byte[] source)
+    throws ParserException, InterruptedException;
+    
+    /**
+     * Parsing a document stored in a {@link File}
+     * @param location the origin of the document 
+     * @param mimeType the mimetype of the document
+     * @param charset the supposed charset of the document or <code>null</code> if unkown 
+     * @param sourceFile the file containing the content of the document
+     * @return a {@link Document} containing the extracted plain text of the document
+     * and some additional metadata.
+     *  
+     * @throws ParserException if the content could not be parsed properly 
+     */    
+    public Document parse(yacyURL location, String mimeType, String charset, File sourceFile)
+    throws ParserException, InterruptedException;
+    
+    /**
+     * Parsing a document available as {@link InputStream}
+     * @param location the origin of the document 
+     * @param mimeType the mimetype of the document
+     * @param charset the supposed charset of the document or <code>null</code> if unkown 
+     * @param source the {@link InputStream} containing the document content
+     * @return a {@link Document} containing the extracted plain text of the document
+     * and some additional metadata.
+     *  
+     * @throws ParserException if the content could not be parsed properly 
+     */    
+    public Document parse(yacyURL location, String mimeType, String charset, InputStream source) 
+    throws ParserException, InterruptedException;
+            
+    /**
+     * Can be used to determine the MimeType(s) that are supported by the parser
+     * @return a {@link Hashtable} containing a list of MimeTypes that are supported by 
+     * the parser
+     */
+    public Hashtable<String, String> getSupportedMimeTypes();
+    
+    /**
+     * This function should be called before reusing the parser object.
+     */
+    public void reset();
+    
+    public void setContentLength(long length);
+    
+    /**
+     * Returns the name of the parser
+     * @return parser name
+     */
+    public String getName();
+}
+
+
--- a/source/de/anomic/document/Parser.java
+++ b/source/de/anomic/document/Parser.java
@ -1,108 +1,190 @@
-//Parser.java 
-//------------------------
-//part of YaCy
-//(C) by Michael Peter Christen; mc@yacy.net
-//first published on http://www.anomic.de
-//Frankfurt, Germany, 2005
-//
-//this file was contributed by Martin Thelian
-//last major change: $LastChangedDate$ by $LastChangedBy$
-//Revision: $LastChangedRevision$
-//
-//This program is free software; you can redistribute it and/or modify
-//it under the terms of the GNU General Public License as published by
-//the Free Software Foundation; either version 2 of the License, or
-//(at your option) any later version.
-//
-//This program is distributed in the hope that it will be useful,
-//but WITHOUT ANY WARRANTY; without even the implied warranty of
-//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//GNU General Public License for more details.
-//
-//You should have received a copy of the GNU General Public License
-//along with this program; if not, write to the Free Software
-//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package de.anomic.document;
-
-import java.io.File;
-import java.io.InputStream;
-import java.util.Hashtable;
-
-import de.anomic.yacy.yacyURL;
-
-/**
- * This interface defines a list of methods that needs to be implemented
- * by each content parser class.
- * @author Martin Thelian
- * @version $LastChangedRevision$ / $LastChangedDate$
- */
-public interface Parser {
-    
-
-    public static long MAX_KEEP_IN_MEMORY_SIZE = 5 * 1024 * 1024;    
-    
-    /**
-     * Parsing a document available as byte array
-     * @param location the origin of the document 
-     * @param mimeType the mimetype of the document
-     * @param charset the supposed charset of the document or <code>null</code> if unkown
-     * @param source the content byte array
-     * @return a {@link Document} containing the extracted plain text of the document
-     * and some additional metadata.
-     *  
-     * @throws ParserException if the content could not be parsed properly 
-     */
-    public Document parse(yacyURL location, String mimeType, String charset, byte[] source)
-    throws ParserException, InterruptedException;
-    
-    /**
-     * Parsing a document stored in a {@link File}
-     * @param location the origin of the document 
-     * @param mimeType the mimetype of the document
-     * @param charset the supposed charset of the document or <code>null</code> if unkown 
-     * @param sourceFile the file containing the content of the document
-     * @return a {@link Document} containing the extracted plain text of the document
-     * and some additional metadata.
-     *  
-     * @throws ParserException if the content could not be parsed properly 
-     */    
-    public Document parse(yacyURL location, String mimeType, String charset, File sourceFile)
-    throws ParserException, InterruptedException;
-    
-    /**
-     * Parsing a document available as {@link InputStream}
-     * @param location the origin of the document 
-     * @param mimeType the mimetype of the document
-     * @param charset the supposed charset of the document or <code>null</code> if unkown 
-     * @param source the {@link InputStream} containing the document content
-     * @return a {@link Document} containing the extracted plain text of the document
-     * and some additional metadata.
-     *  
-     * @throws ParserException if the content could not be parsed properly 
-     */    
-    public Document parse(yacyURL location, String mimeType, String charset, InputStream source) 
-    throws ParserException, InterruptedException;
-            
-    /**
-     * Can be used to determine the MimeType(s) that are supported by the parser
-     * @return a {@link Hashtable} containing a list of MimeTypes that are supported by 
-     * the parser
-     */
-    public Hashtable<String, String> getSupportedMimeTypes();
-    
-    /**
-     * This function should be called before reusing the parser object.
-     */
-    public void reset();
-    
-    public void setContentLength(long length);
-    
-    /**
-     * Returns the name of the parser
-     * @return parser name
-     */
-    public String getName();
-}
-
-
+// Parser.java
+// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 09.07.2009 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2009-03-20 16:44:59 +0100 (Fr, 20 Mrz 2009) $
+// $LastChangedRevision: 5736 $
+// $LastChangedBy: borg-0300 $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package de.anomic.document;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.Iterator;
+
+import de.anomic.document.parser.bzipParser;
+import de.anomic.document.parser.docParser;
+import de.anomic.document.parser.gzipParser;
+import de.anomic.document.parser.htmlParser;
+import de.anomic.document.parser.mimeTypeParser;
+import de.anomic.document.parser.odtParser;
+import de.anomic.document.parser.pdfParser;
+import de.anomic.document.parser.pptParser;
+import de.anomic.document.parser.psParser;
+import de.anomic.document.parser.rpmParser;
+import de.anomic.document.parser.rssParser;
+import de.anomic.document.parser.rtfParser;
+import de.anomic.document.parser.sevenzipParser;
+import de.anomic.document.parser.swfParser;
+import de.anomic.document.parser.tarParser;
+import de.anomic.document.parser.vcfParser;
+import de.anomic.document.parser.vsdParser;
+import de.anomic.document.parser.xlsParser;
+import de.anomic.document.parser.zipParser;
+import de.anomic.yacy.yacyURL;
+import de.anomic.yacy.logging.Log;
+
+public final class Parser {
+
+    private static final Log theLogger = new Log("PARSER");
+    public static final HashMap<String, Idiom> availableParserList = new HashMap<String, Idiom>();
+    
+    static {
+        initParser(new bzipParser());
+        initParser(new docParser());
+        initParser(new gzipParser());
+        initParser(new mimeTypeParser());
+        initParser(new odtParser());
+        initParser(new pdfParser());
+        initParser(new pptParser());
+        initParser(new psParser());
+        initParser(new rpmParser());
+        initParser(new rssParser());
+        initParser(new rtfParser());
+        initParser(new sevenzipParser());
+        initParser(new swfParser());
+        initParser(new tarParser());
+        initParser(new vcfParser());
+        initParser(new vsdParser());
+        initParser(new xlsParser());
+        initParser(new zipParser());
+    }
+
+    private static void initParser(Idiom theParser) {
+        final Hashtable<String, String> supportedMimeTypes = theParser.getSupportedMimeTypes();
+        final Iterator<String> mimeTypeIterator = supportedMimeTypes.keySet().iterator();
+        while (mimeTypeIterator.hasNext()) {
+            final String mimeType = mimeTypeIterator.next();
+            availableParserList.put(mimeType, theParser);
+            Log.logInfo("PARSER", "Found parser for mimeType '" + mimeType + "': " + theParser.getName());
+        }
+    }
+
+    public static Document parseSource(final yacyURL location,
+            final String mimeType, final String charset,
+            final byte[] sourceArray) throws InterruptedException,
+            ParserException {
+        ByteArrayInputStream byteIn = null;
+        try {
+            if (theLogger.isFine()) theLogger.logFine("Parsing '" + location + "' from byte-array");
+            if (sourceArray == null || sourceArray.length == 0) {
+                final String errorMsg = "No resource content available (1) " + (((sourceArray == null) ? "source == null" : "source.length() == 0") + ", url = " + location.toNormalform(true, false));
+                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg, location, errorMsg);
+            }
+            byteIn = new ByteArrayInputStream(sourceArray);
+            return parseSource(location, mimeType, charset, sourceArray.length, byteIn);
+        } catch (final Exception e) {
+            if (e instanceof InterruptedException) throw (InterruptedException) e;
+            if (e instanceof ParserException) throw (ParserException) e;
+            theLogger.logSevere("Unexpected exception in parseSource from byte-array: " + e.getMessage(), e);
+            throw new ParserException("Unexpected exception while parsing " + location, location, e);
+        } finally {
+            if (byteIn != null) try {
+                byteIn.close();
+            } catch (final Exception ex) { }
+        }
+    }
+
+    public static Document parseSource(final yacyURL location,
+            final String mimeType, final String charset,
+            final File sourceFile) throws InterruptedException, ParserException {
+
+        BufferedInputStream sourceStream = null;
+        try {
+            if (theLogger.isFine()) theLogger.logFine("Parsing '" + location + "' from file");
+            if (!(sourceFile.exists() && sourceFile.canRead() && sourceFile.length() > 0)) {
+                final String errorMsg = sourceFile.exists() ? "Empty resource file." : "No resource content available (2).";
+                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg, location, "document has no content");
+            }
+            sourceStream = new BufferedInputStream(new FileInputStream(sourceFile));
+            return parseSource(location, mimeType, charset, sourceFile.length(), sourceStream);
+        } catch (final Exception e) {
+            if (e instanceof InterruptedException) throw (InterruptedException) e;
+            if (e instanceof ParserException) throw (ParserException) e;
+            theLogger.logSevere("Unexpected exception in parseSource from File: " + e.getMessage(), e);
+            throw new ParserException("Unexpected exception while parsing " + location, location, e);
+        } finally {
+            if (sourceStream != null)try {
+                sourceStream.close();
+            } catch (final Exception ex) {}
+        }
+    }
+
+    public static Document parseSource(final yacyURL location,
+            String mimeType, final String charset,
+            final long contentLength, final InputStream sourceStream)
+            throws InterruptedException, ParserException {
+        try {
+            if (theLogger.isFine()) theLogger.logFine("Parsing '" + location + "' from stream");
+            mimeType = Classification.normalizeMimeType(mimeType);
+            final String fileExt = Classification.getFileExt(location);
+            final String documentCharset = htmlParser.patchCharsetEncoding(charset);
+            if (!Classification.supportedContent(location, mimeType)) {
+                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (1)";
+                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg, location, "wrong mime type or wrong extension");
+            }
+            if (theLogger.isFine()) theLogger.logInfo("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "'.");
+            Idiom parser = availableParserList.get(Classification.normalizeMimeType(mimeType));
+            Document doc = null;
+            if (parser != null) {
+                parser.setContentLength(contentLength);
+                doc = parser.parse(location, mimeType, documentCharset, sourceStream);
+            } else if (Classification.HTMLParsableMimeTypesContains(mimeType)) {
+                doc = new htmlParser().parse(location, mimeType, documentCharset, sourceStream);
+            } else {
+                final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (2)";
+                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg, location, "wrong mime type or wrong extension");
+            }
+            if (doc == null) {
+                final String errorMsg = "Unexpected error. Parser returned null.";
+                theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
+                throw new ParserException(errorMsg, location);
+            }
+            return doc;
+        } catch (final Exception e) {
+            if (e instanceof InterruptedException) throw (InterruptedException) e;
+            if (e instanceof ParserException) throw (ParserException) e;
+            final String errorMsg = "Unexpected exception. " + e.getMessage();
+            theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
+            throw new ParserException(errorMsg, location, e);
+        }
+    }
+
+}
--- a/source/de/anomic/document/ParserConfig.java
+++ b/source/de/anomic/document/ParserConfig.java
@ -1,174 +0,0 @@
-// plasmaParserConfig.java 
-// -------------------------------------
-// part of YACY
-// (C) by Michael Peter Christen; mc@yacy.net
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// This file ist contributed by Martin Thelian
-//
-// $LastChangedDate: 2006-02-20 23:57:42 +0100 (Mo, 20 Feb 2006) $
-// $LastChangedRevision: 1715 $
-// $LastChangedBy: theli $
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package de.anomic.document;
-
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.Set;
-
-import de.anomic.yacy.yacyURL;
-import de.anomic.yacy.logging.Log;
-
-public class ParserConfig {
-    
-    /**
-     * A list containing all enabled parsers and the mimeType that they can handle
-     * @see #loadEnabledParserList()
-     * @see #setEnabledParserList(Enumeration)
-     */
-    public final HashSet<String> enabledParserList;    
-    
-    /**
-     * A list of file extensions that are supported by all enabled parsers
-     */
-    private final HashSet<String> supportedFileExt;
-    
-    public ParserConfig() {
-        supportedFileExt = new HashSet<String>();
-        enabledParserList = new HashSet<String>();
-    }
-    
-    public boolean supportedContent(final yacyURL url, String mimeType) {
-        // TODO: we need some exceptions here to index URLs like this
-        //       http://www.musicabona.com/respighi/12668/cd/index.html.fr
-        mimeType = ParserDispatcher.normalizeMimeType(mimeType);
-        if (
-                mimeType.equals("text/html") ||
-                mimeType.equals("application/xhtml+xml") ||
-                mimeType.equals("text/plain")
-            ) {
-            return supportedMimeTypesContains(mimeType);
-        }
-        return supportedMimeTypesContains(mimeType) && supportedFileExt(url);
-    }        
-    
-    public boolean supportedMimeTypesContains(String mimeType) {
-        mimeType = ParserDispatcher.normalizeMimeType(mimeType);
-        
-        synchronized (ParserDispatcher.supportedHTMLMimeTypes) {
-            if (ParserDispatcher.supportedHTMLMimeTypes.contains(mimeType)) return true;
-        }        
-
-        synchronized (this.enabledParserList) { 
-            return this.enabledParserList.contains(mimeType);
-        }
-    }        
-    
-    private boolean supportedFileExt(final yacyURL url) {
-        if (url == null) throw new NullPointerException();
-        
-        // getting the file path
-        final String name = ParserDispatcher.getFileExt(url);
-        return supportedFileExtContains(name);
-    }
-    
-    public boolean supportedFileExtContains(String fileExt) {
-        if (fileExt == null) return false;        
-        fileExt = fileExt.trim().toLowerCase();
-
-        synchronized (ParserDispatcher.supportedHTMLFileExt) {
-            if (ParserDispatcher.supportedHTMLFileExt.contains(fileExt)) return true;
-        }        
-
-        synchronized(this.supportedFileExt) {
-            return this.supportedFileExt.contains(fileExt);
-        }
-    }        
-    
-    public void addParseableMimeTypes(final String enabledMimeTypes) {
-        HashSet<String> mimeTypes = null;
-        if ((enabledMimeTypes == null) || (enabledMimeTypes.length() == 0)) {
-            mimeTypes = new HashSet<String>();
-        } else {            
-            final String[] enabledMimeTypeList = enabledMimeTypes.split(",");
-            mimeTypes = new HashSet<String>(enabledMimeTypeList.length);
-            for (int i = 0; i < enabledMimeTypeList.length; i++) mimeTypes.add(enabledMimeTypeList[i].toLowerCase().trim());
-        }
-        setEnabledParserList(mimeTypes);
-    }
-    
-    public void enableAllParsers() {
-        final Set<String> availableMimeTypes = ParserDispatcher.availableParserList.keySet();
-        setEnabledParserList(availableMimeTypes);
-    }
-    
-    public String[] setEnabledParserList(final Set<String> mimeTypeSet) {
-        
-        final HashSet<String> newEnabledParsers = new HashSet<String>();
-        final HashSet<String> newSupportedFileExt = new HashSet<String>();
-        
-        if (mimeTypeSet != null) {
-            final Iterator<String> mimeTypes = mimeTypeSet.iterator();
-            while (mimeTypes.hasNext()) {
-                final String mimeType = mimeTypes.next();
-                Parser theParser = ParserDispatcher.availableParserList.get(mimeType);
-                if (theParser != null) {
-                    try {
-                        // getting a list of mimeTypes that the parser supports
-                        final Hashtable<String, String> parserSupportsMimeTypes = theParser.getSupportedMimeTypes();
-                        if (parserSupportsMimeTypes != null) {
-                            final Object supportedExtensions = parserSupportsMimeTypes.get(mimeType);
-                            if ((supportedExtensions != null) &&
-                                    (supportedExtensions instanceof String) &&
-                                    (((String)supportedExtensions).length() > 0)) {
-                                final String[] extArray = ((String)supportedExtensions).split(",");
-                                newSupportedFileExt.addAll(Arrays.asList(extArray));
-                            }
-                        }
-                        newEnabledParsers.add(mimeType);
-                        
-                    } catch (final Exception e) {
-                        Log.logSevere("PARSER", "error in setEnabledParserList", e);
-                    } finally {
-                        if (theParser != null)
-                            theParser = null; // destroy object
-                    }
-                }
-            }
-        }
-        
-        synchronized (this.enabledParserList) {
-            this.enabledParserList.addAll(newEnabledParsers);
-        }
-        
-        synchronized (this.supportedFileExt) {
-            this.supportedFileExt.addAll(newSupportedFileExt);
-        }
-
-        return newEnabledParsers.toArray(new String[newEnabledParsers.size()]);
-    }
-    
-    @SuppressWarnings("unchecked")
-    public HashSet<String> getEnabledParserList() {
-        synchronized (this.enabledParserList) {
-            return (HashSet<String>) this.enabledParserList.clone();
-        }        
-    }
-}
--- a/source/de/anomic/document/ParserDispatcher.java
+++ b/source/de/anomic/document/ParserDispatcher.java
@ -1,576 +0,0 @@
-
-package de.anomic.document;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.MalformedURLException;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-
-import de.anomic.document.parser.bzipParser;
-import de.anomic.document.parser.docParser;
-import de.anomic.document.parser.gzipParser;
-import de.anomic.document.parser.htmlParser;
-import de.anomic.document.parser.mimeTypeParser;
-import de.anomic.document.parser.odtParser;
-import de.anomic.document.parser.pdfParser;
-import de.anomic.document.parser.pptParser;
-import de.anomic.document.parser.psParser;
-import de.anomic.document.parser.rpmParser;
-import de.anomic.document.parser.rssParser;
-import de.anomic.document.parser.rtfParser;
-import de.anomic.document.parser.sevenzipParser;
-import de.anomic.document.parser.swfParser;
-import de.anomic.document.parser.tarParser;
-import de.anomic.document.parser.vcfParser;
-import de.anomic.document.parser.vsdParser;
-import de.anomic.document.parser.xlsParser;
-import de.anomic.document.parser.zipParser;
-import de.anomic.document.parser.html.ImageEntry;
-import de.anomic.yacy.yacyURL;
-import de.anomic.yacy.logging.Log;
-
-public final class ParserDispatcher {
- 
- public static final ParserConfig parserConfig = new ParserConfig();
- 
- /**
-  * A list containing all installed parsers and the mimeType that they support
-  * @see #loadAvailableParserList()
-  */
- public static final HashMap<String, Parser> availableParserList = new HashMap<String, Parser>();
- 
- /**
-  * A list of file extensions and mime types that are supported by the html-parser
-  */
- public static final HashSet<String> supportedHTMLFileExt = new HashSet<String>();
- public static final HashSet<String> supportedHTMLMimeTypes = new HashSet<String>();    
- 
- private static final Properties mimeTypeLookupByFileExt = new Properties();
- static {
-     // loading a list of extensions from file
-     BufferedInputStream bufferedIn = null;
-     try {            
-         mimeTypeLookupByFileExt.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("httpd.mime"))));
-     } catch (final IOException e) {
-         System.err.println("ERROR: httpd.mime not found in settings path");
-     } finally {
-         if (bufferedIn != null) try{bufferedIn.close();}catch(final Exception e){}
-     }    
- }
-
- /**
-  * A list of media extensions that should <b>not</b> be handled by the Parser
-  */
- private static final HashSet<String> mediaExtSet = new HashSet<String>();
- 
- /**
-  * A list of image, audio, video and application extensions
-  */
- private static final HashSet<String> imageExtSet = new HashSet<String>();
- private static final HashSet<String> audioExtSet = new HashSet<String>();
- private static final HashSet<String> videoExtSet = new HashSet<String>();
- private static final HashSet<String> appsExtSet = new HashSet<String>();
- 
- 
- /**
-  * Initializing the 
-  * @see #initMediaExt(String)
-  */
- static {
-     final String apps = "sit,hqx,img,dmg,exe,com,bat,sh,vbs,zip,jar";
-     final String audio = "mp2,mp3,ogg,aac,aif,aiff,wav";
-     final String video = "swf,avi,wmv,rm,mov,mpg,mpeg,ram,m4v";
-     final String image = "jpg,jpeg,jpe,gif,png,ico,bmp";
-     initMediaExt(extString2extList(
-             apps + "," +  // application container
-             "tar,gz,bz2,arj,zip,rar," + // archive formats
-             "ps,xls,ppt,asf," +         // text formats without support
-             audio + "," +               // audio formats
-             video + "," +               // video formats
-             image                       // image formats
-             ));
-     initImageExt(extString2extList(image));  // image formats
-     initAudioExt(extString2extList(audio));  // audio formats
-     initVideoExt(extString2extList(video));  // video formats
-     initAppsExt(extString2extList(apps));    // application formats
-     
-     /* ===================================================
-      * loading a list of available parsers
-      * =================================================== */        
-     loadAvailableParserList();      
- }
- 
- private static final Log theLogger = new Log("PARSER");
- 
- 
- /**
-  * This function is used to initialize the HTMLParsableMimeTypes List.
-  * This list contains a list of mimeTypes that can be parsed in realtime by
-  * the yacy html-Parser
-  * @param htmlParsableMimeTypes a list of mimetypes that can be parsed by the 
-  * yacy html parser
-  */
- public static void initHTMLParsableMimeTypes(final String htmlParsableMimeTypes) {
-     final LinkedList<String> mimeTypes = new LinkedList<String>();
-     if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) {
-         return;
-     }
-     final String[] realtimeParsableMimeTypeList = htmlParsableMimeTypes.split(",");        
-     for (int i = 0; i < realtimeParsableMimeTypeList.length; i++) {
-         mimeTypes.add(realtimeParsableMimeTypeList[i].toLowerCase().trim());
-     }
-     synchronized (supportedHTMLMimeTypes) {
-         supportedHTMLMimeTypes.addAll(mimeTypes);
-     }        
- }
- 
- public static List<String> extString2extList(final String extString) {
-     final LinkedList<String> extensions = new LinkedList<String>();
-     if ((extString == null) || (extString.length() == 0)) {
-         return extensions;
-     }
-     final String[] xs = extString.split(",");
-     for (int i = 0; i < xs.length; i++) extensions.add(xs[i].toLowerCase().trim());
-     return extensions;
- }
- 
- public static void initMediaExt(final List<String> mediaExtList) {
-     synchronized (mediaExtSet) {
-         mediaExtSet.addAll(mediaExtList);
-     }
- }
- 
- private static void initImageExt(final List<String> imageExtList) {
-     synchronized (imageExtSet) {
-         imageExtSet.addAll(imageExtList);
-     }
- }
- 
- private static void initAudioExt(final List<String> audioExtList) {
-     synchronized (audioExtSet) {
-         audioExtSet.addAll(audioExtList);
-     }
- }
- 
- private static void initVideoExt(final List<String> videoExtList) {
-     synchronized (videoExtSet) {
-         videoExtSet.addAll(videoExtList);
-     }
- }
- 
- private static void initAppsExt(final List<String> appsExtList) {
-     synchronized (appsExtSet) {
-         appsExtSet.addAll(appsExtList);
-     }
- }
- 
- public static void initSupportedHTMLFileExt(final List<String> supportedRealtimeFileExtList) {
-     synchronized (supportedHTMLFileExt) {
-         supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
-     }
- }
-     
- private static boolean HTMLParsableMimeTypesContains(String mimeType) {
-     mimeType = normalizeMimeType(mimeType);
-     synchronized (supportedHTMLMimeTypes) {
-         return supportedHTMLMimeTypes.contains(mimeType);
-     }
- }
-
- public static String getFileExt(final yacyURL url) {
-     // getting the file path
-     String name = url.getPath();
-     
-     // tetermining last position of / in the file path
-     int p = name.lastIndexOf('/');
-     if (p != -1) {
-         name = name.substring(p);
-     }
-         
-     // termining last position of . in file path
-     p = name.lastIndexOf('.');
-     if (p < 0) return ""; 
-     return name.substring(p + 1);        
- }
-
- public static boolean mediaExtContains(String mediaExt) {
-     if (mediaExt == null) return false;
-     mediaExt = mediaExt.trim().toLowerCase();
-     
-     synchronized (supportedHTMLFileExt) {
-         if (supportedHTMLFileExt.contains(mediaExt)) return false;
-     }        
-     
-     if (supportedFileExtContains(mediaExt)) return false;
-     
-     synchronized (mediaExtSet) {
-         return mediaExtSet.contains(mediaExt);
-     }
- }
-
- public static boolean imageExtContains(final String imageExt) {
-     if (imageExt == null) return false;
-     synchronized (imageExtSet) {
-         return imageExtSet.contains(imageExt.trim().toLowerCase());
-     }
- }
-
- public static boolean audioExtContains(final String audioExt) {
-     if (audioExt == null) return false;
-     synchronized (audioExtSet) {
-         return audioExtSet.contains(audioExt.trim().toLowerCase());
-     }
- }
-
- public static boolean videoExtContains(final String videoExt) {
-     if (videoExt == null) return false;
-     synchronized (videoExtSet) {
-         return videoExtSet.contains(videoExt.trim().toLowerCase());
-     }
- }
-
- public static boolean appsExtContains(final String appsExt) {
-     if (appsExt == null) return false;
-     synchronized (appsExtSet) {
-         return appsExtSet.contains(appsExt.trim().toLowerCase());
-     }
- }
-
- public static String normalizeMimeType(String mimeType) {
-     //if (mimeType == null) doMimeTypeAnalysis
-     if (mimeType == null) mimeType = "application/octet-stream";
-     mimeType = mimeType.trim().toLowerCase();
-     
-     final int pos = mimeType.indexOf(';');
-     return ((pos < 0) ? mimeType : mimeType.substring(0, pos));              
- }
- 
- public static String getMimeTypeByFileExt(final String fileExt) {        
-     return mimeTypeLookupByFileExt.getProperty(fileExt,"application/octet-stream");
- }
-
- public static HashMap<String, Parser> getAvailableParserList() {
-     return availableParserList;
- }    
- 
- private static void loadAvailableParserList() {
-     initParser(new bzipParser());
-     initParser(new docParser());
-     initParser(new gzipParser());
-     initParser(new mimeTypeParser());
-     initParser(new odtParser());
-     initParser(new pdfParser());
-     initParser(new pptParser());
-     initParser(new psParser());
-     initParser(new rpmParser());
-     initParser(new rssParser());
-     initParser(new rtfParser());
-     initParser(new sevenzipParser());
-     initParser(new swfParser());
-     initParser(new tarParser());
-     initParser(new vcfParser());
-     initParser(new vsdParser());
-     initParser(new xlsParser());
-     initParser(new zipParser());
- }
- 
- private static void initParser(Parser theParser) {
-  // loading the list of mime-types that are supported by this parser class
-     final Hashtable<String, String> supportedMimeTypes = theParser.getSupportedMimeTypes();
-     
-     final Iterator<String> mimeTypeIterator = supportedMimeTypes.keySet().iterator();
-     while (mimeTypeIterator.hasNext()) {
-         final String mimeType = mimeTypeIterator.next();
-         availableParserList.put(mimeType, theParser);
-         Log.logInfo("PARSER", "Found parser for mimeType '" + mimeType + "'." +
-                   "\n\tName:    " + theParser.getName());
-     }
- }
- 
- public static Document parseSource(final yacyURL location, final String mimeType, final String charset, final byte[] sourceArray) 
- throws InterruptedException, ParserException {
-     ByteArrayInputStream byteIn = null;
-     try {
-         if (theLogger.isFine())
-             theLogger.logFine("Parsing '" + location + "' from byte-array");
-         
-         // testing if the resource is not empty
-         if (sourceArray == null || sourceArray.length == 0) {
-             final String errorMsg = "No resource content available (1) " + (((sourceArray == null) ? "source == null" : "source.length() == 0") + ", url = " + location.toNormalform(true, false));
-             theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-             throw new ParserException(errorMsg,location, errorMsg);
-         }              
-         
-         // creating an InputStream
-         byteIn = new ByteArrayInputStream(sourceArray);
-         
-         // parsing the temp file
-         return parseSource(location, mimeType, charset, sourceArray.length, byteIn);
-         
-     } catch (final Exception e) {
-         // Interrupted- and Parser-Exceptions should pass through
-         if (e instanceof InterruptedException) throw (InterruptedException) e;
-         if (e instanceof ParserException) throw (ParserException) e;
-         
-         // log unexpected error
-         theLogger.logSevere("Unexpected exception in parseSource from byte-array: " + e.getMessage(), e);
-         throw new ParserException("Unexpected exception while parsing " + location,location, e);
-     } finally {
-         if (byteIn != null) try { byteIn.close(); } catch (final Exception ex){/* ignore this */}
-     }
-     
- }
-
- public static Document parseSource(final yacyURL location, final String theMimeType, final String theDocumentCharset, final File sourceFile) throws InterruptedException, ParserException {
-     
-     BufferedInputStream sourceStream = null;
-     try {
-         if (theLogger.isFine())
-             theLogger.logFine("Parsing '" + location + "' from file");
-         
-         // testing if the resource is not empty
-         if (!(sourceFile.exists() && sourceFile.canRead() && sourceFile.length() > 0)) {
-             final String errorMsg = sourceFile.exists() ? "Empty resource file." : "No resource content available (2).";
-             theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-             throw new ParserException(errorMsg,location, "document has no content");
-         }        
-         
-         // create a new InputStream
-         sourceStream = new BufferedInputStream(new FileInputStream(sourceFile));
-         
-         // parsing the data
-         return parseSource(location, theMimeType, theDocumentCharset, sourceFile.length(),  sourceStream);
-         
-     } catch (final Exception e) {
-         // Interrupted- and Parser-Exceptions should pass through
-         if (e instanceof InterruptedException) throw (InterruptedException) e;
-         if (e instanceof ParserException) throw (ParserException) e;
-
-         // log unexpected error
-         theLogger.logSevere("Unexpected exception in parseSource from File: " + e.getMessage(), e);
-         throw new ParserException("Unexpected exception while parsing " + location,location, e);
-     } finally {
-         if (sourceStream != null) try { sourceStream.close(); } catch (final Exception ex){/* ignore this */}
-     }
- }
- 
- /**
-  * To parse a resource from an {@link InputStream}
-  * @param location the URL of the resource
-  * @param theMimeType the resource mimetype (<code>null</code> if unknown)
-  * @param theDocumentCharset the charset of the resource (<code>null</code> if unknown)
-  * @param contentLength the content length of the resource (<code>-1</code> if unknown)
-  * @param sourceStream an {@link InputStream} containing the resource body 
-  * @return the parsed {@link ParserDocument document}
-  * @throws InterruptedException
-  * @throws ParserException
-  */
- public static Document parseSource(final yacyURL location, final String theMimeType, final String theDocumentCharset, final long contentLength, final InputStream sourceStream) throws InterruptedException, ParserException {        
-     Parser theParser = null;
-     String mimeType = null;
-     try {
-         if (theLogger.isFine())
-             theLogger.logFine("Parsing '" + location + "' from stream");            
-         
-         // getting the mimetype of the document
-         mimeType = normalizeMimeType(theMimeType);
-         
-         // getting the file extension of the document
-         final String fileExt = getFileExt(location);
-         
-         // getting the charset of the document
-         // TODO: do a charset detection here ....
-         final String documentCharset = htmlParser.patchCharsetEncoding(theDocumentCharset);
-         
-         // testing if parsing is supported for this resource
-         if (!supportedContent(location,mimeType)) {
-             final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (1)";
-             theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-             throw new ParserException(errorMsg,location, "wrong mime type or wrong extension");
-         }
-         
-         if (theLogger.isFine())
-             theLogger.logInfo("Parsing " + location + " with mimeType '" + mimeType + 
-                                    "' and file extension '" + fileExt + "'.");                
-         
-         // getting the correct parser for the given mimeType
-         theParser = getParser(mimeType);
-         
-         // if a parser was found we use it ...
-         Document doc = null;
-         if (theParser != null) {
-             // set the content length of the resource
-             theParser.setContentLength(contentLength);
-             // parse the resource
-             doc = theParser.parse(location, mimeType,documentCharset,sourceStream);
-         } else if (HTMLParsableMimeTypesContains(mimeType)) {
-             doc = new htmlParser().parse(location, mimeType, documentCharset, sourceStream);
-         } else {
-             final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (2)";
-             theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-             throw new ParserException(errorMsg,location, "wrong mime type or wrong extension");                
-         }
-         
-         // check result
-         if (doc == null) {
-             final String errorMsg = "Unexpected error. Parser returned null.";
-             theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
-             throw new ParserException(errorMsg,location);                
-         }
-         return doc;
-         
-     } catch (final Exception e) {
-         // Interrupted- and Parser-Exceptions should pass through
-         if (e instanceof InterruptedException) throw (InterruptedException) e;
-         if (e instanceof ParserException) throw (ParserException) e;
-         
-         // log unexpected error
-         final String errorMsg = "Unexpected exception. " + e.getMessage();
-         theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
-         throw new ParserException(errorMsg,location,e);            
-         
-     } finally {
-         if (theParser != null) {
-             theParser = null; // delete object
-         }
-     }        
- }
- 
- 
-
- 
- /**
-  * This function is used to determine the parser class that should be used for a given
-  * mimetype ...
-  * @param mimeType MIME-Type of the resource
-  * @return the {@link Parser}-class that is supposed to parse the resource of
-  * the given MIME-Type
-  */
- private static Parser getParser(String mimeType) {
-
-     mimeType = normalizeMimeType(mimeType);        
-
-     // determining the proper parser class name for the mimeType
-     return availableParserList.get(mimeType);
- }
- 
- public static Map<yacyURL, String> allReflinks(final Collection<?> links) {
-     // links is either a Set of Strings (with urls) or htmlFilterImageEntries
-     // we find all links that are part of a reference inside a url
-     final HashMap<yacyURL, String> v = new HashMap<yacyURL, String>();
-     final Iterator<?> i = links.iterator();
-     Object o;
-     yacyURL url;
-     String u;
-     int pos;
-     loop: while (i.hasNext()) try {
-         o = i.next();
-         if (o instanceof yacyURL) url = (yacyURL) o;
-         else if (o instanceof String) url = new yacyURL((String) o, null);
-         else if (o instanceof ImageEntry) url = ((ImageEntry) o).url();
-         else {
-             assert false;
-             continue;
-         }
-         u = url.toNormalform(true, true);
-         if ((pos = u.toLowerCase().indexOf("http://",7)) > 0) {
-             i.remove();
-             u = u.substring(pos);
-             while ((pos = u.toLowerCase().indexOf("http://",7)) > 0) u = u.substring(pos);
-             url = new yacyURL(u, null);
-             if (!(v.containsKey(url))) v.put(url, "ref");
-             continue loop;
-         }
-         if ((pos = u.toLowerCase().indexOf("/www.",7)) > 0) {
-             i.remove();
-             u = "http:/" + u.substring(pos);
-             while ((pos = u.toLowerCase().indexOf("/www.",7)) > 0) u = "http:/" + u.substring(pos);
-             url = new yacyURL(u, null);
-             if (!(v.containsKey(url))) v.put(url, "ref");
-             continue loop;
-         }
-     } catch (final MalformedURLException e) {}
-     return v;
- }
- 
- static Map<yacyURL, String> allSubpaths(final Collection<?> links) {
-     // links is either a Set of Strings (urls) or a Set of htmlFilterImageEntries
-     final HashSet<String> h = new HashSet<String>();
-     Iterator<?> i = links.iterator();
-     Object o;
-     yacyURL url;
-     String u;
-     int pos;
-     int l;
-     while (i.hasNext()) try {
-         o = i.next();
-         if (o instanceof yacyURL) url = (yacyURL) o;
-         else if (o instanceof String) url = new yacyURL((String) o, null);
-         else if (o instanceof ImageEntry) url = ((ImageEntry) o).url();
-         else {
-             assert false;
-             continue;
-         }
-         u = url.toNormalform(true, true);
-         if (u.endsWith("/")) u = u.substring(0, u.length() - 1);
-         pos = u.lastIndexOf('/');
-         while (pos > 8) {
-             l = u.length();
-             u = u.substring(0, pos + 1);
-             h.add(u);
-             u = u.substring(0, pos);
-             assert (u.length() < l) : "u = " + u;
-             pos = u.lastIndexOf('/');
-         }
-     } catch (final MalformedURLException e) {}
-     // now convert the strings to yacyURLs
-     i = h.iterator();
-     final HashMap<yacyURL, String> v = new HashMap<yacyURL, String>();
-     while (i.hasNext()) {
-         u = (String) i.next();
-         try {
-             url = new yacyURL(u, null);
-             v.put(url, "sub");
-         } catch (final MalformedURLException e) {}
-     }
-     return v;
- }
- 
- public static boolean supportedContent(final yacyURL url, final String mimeType) {
-     if (url == null) throw new NullPointerException();
-     
-     if (parserConfig.supportedContent(url, mimeType)) return true;
-     
-     return false;
- }    
-
- public static void addParseableMimeTypes(final String configStr) {
-     parserConfig.addParseableMimeTypes(configStr);
- }
-
- public static String[] setEnabledParserList(final Set<String> mimeTypeSet) {
-     return parserConfig.setEnabledParserList(mimeTypeSet);        
- }
- 
- public static boolean supportedFileExtContains(final String fileExt) {
-     return parserConfig.supportedFileExtContains(fileExt);
- }
-
- public static boolean supportedMimeTypesContains(final String mimeType) {
-     return parserConfig.supportedMimeTypesContains(mimeType);
- }
-
-}
--- a/source/de/anomic/document/parser/bzipParser.java
+++ b/source/de/anomic/document/parser/bzipParser.java
@ -35,14 +35,14 @@ import java.util.Hashtable;
 import org.apache.tools.bzip2.CBZip2InputStream;

 import de.anomic.document.AbstractParser;
+import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
-import de.anomic.document.ParserDispatcher;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;

-public class bzipParser extends AbstractParser implements Parser {
+public class bzipParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
@ -107,7 +107,7 @@ public class bzipParser extends AbstractParser implements Parser {
            checkInterruption();
            
            // creating a new parser class to parse the unzipped content
-            return ParserDispatcher.parseSource(location,null,null,tempFile);
+            return Parser.parseSource(location,null,null,tempFile);
        } catch (final Exception e) {  
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof ParserException) throw (ParserException) e;
--- a/source/de/anomic/document/parser/docParser.java
+++ b/source/de/anomic/document/parser/docParser.java
@ -34,12 +34,12 @@ import org.textmining.extraction.TextExtractor;
 import org.textmining.extraction.word.WordTextExtractorFactory;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.yacy.yacyURL;

-public class docParser extends AbstractParser implements Parser {
+public class docParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/gzipParser.java
+++ b/source/de/anomic/document/parser/gzipParser.java
@ -34,14 +34,14 @@ import java.util.Hashtable;
 import java.util.zip.GZIPInputStream;

 import de.anomic.document.AbstractParser;
+import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
-import de.anomic.document.ParserDispatcher;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;

-public class gzipParser extends AbstractParser implements Parser {
+public class gzipParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
@ -97,7 +97,7 @@ public class gzipParser extends AbstractParser implements Parser {
            checkInterruption();
            
            // creating a new parser class to parse the unzipped content
-            return ParserDispatcher.parseSource(location,null,null,tempFile);
+            return Parser.parseSource(location,null,null,tempFile);
        } catch (final Exception e) {    
            if (e instanceof InterruptedException) throw (InterruptedException) e;
            if (e instanceof ParserException) throw (ParserException) e;
--- a/source/de/anomic/document/parser/htmlParser.java
+++ b/source/de/anomic/document/parser/htmlParser.java
@ -1,3 +1,29 @@
+// htmlParser.java
+// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 09.07.2009 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2009-03-20 16:44:59 +0100 (Fr, 20 Mrz 2009) $
+// $LastChangedRevision: 5736 $
+// $LastChangedBy: borg-0300 $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
 package de.anomic.document.parser;

 import java.io.IOException;
@ -9,7 +35,7 @@ import java.util.Hashtable;

 import de.anomic.document.AbstractParser;
 import de.anomic.document.Document;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.parser.html.ContentScraper;
 import de.anomic.document.parser.html.ScraperInputStream;
@ -17,7 +43,7 @@ import de.anomic.document.parser.html.TransformerWriter;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;

-public class htmlParser extends AbstractParser implements Parser {
+public class htmlParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
@ -187,6 +213,7 @@ public class htmlParser extends AbstractParser implements Parser {

        return encoding;
    }
+
    
    public Hashtable<String, String> getSupportedMimeTypes() {
        return SUPPORTED_MIME_TYPES;
--- a/source/de/anomic/document/parser/mimeTypeParser.java
+++ b/source/de/anomic/document/parser/mimeTypeParser.java
@ -41,14 +41,14 @@ import org.apache.log4j.Level;
 import org.apache.log4j.Logger;

 import de.anomic.document.AbstractParser;
+import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
-import de.anomic.document.ParserDispatcher;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;

-public class mimeTypeParser extends AbstractParser implements Parser {
+public class mimeTypeParser extends AbstractParser implements Idiom {
    
    /**
     * a list of mime types that are supported by this parser class
@ -140,7 +140,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
                checkInterruption();
                
                // parsing the content using the determined mimetype
-                return ParserDispatcher.parseSource(location,mimeType,charset,sourceFile);
+                return Parser.parseSource(location,mimeType,charset,sourceFile);
            }
            throw new ParserException("Unable to detect mimetype of resource (3).",location);
        } catch (final MagicMatchNotFoundException e) {
--- a/source/de/anomic/document/parser/odtParser.java
+++ b/source/de/anomic/document/parser/odtParser.java
@ -47,7 +47,7 @@ import com.catcode.odf.OpenDocumentTextInputStream;

 import de.anomic.crawler.HTTPLoader;
 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.http.httpClient;
@ -57,7 +57,7 @@ import de.anomic.kelondro.util.FileUtils;
 import de.anomic.server.serverCharBuffer;
 import de.anomic.yacy.yacyURL;

-public class odtParser extends AbstractParser implements Parser {
+public class odtParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
@ -109,7 +109,7 @@ public class odtParser extends AbstractParser implements Parser {
                    final long contentSize = zipEntry.getSize();
                    
                    // creating a writer for output
-                    if ((contentSize == -1) || (contentSize > Parser.MAX_KEEP_IN_MEMORY_SIZE)) {
+                    if ((contentSize == -1) || (contentSize > Idiom.MAX_KEEP_IN_MEMORY_SIZE)) {
                        writerFile = File.createTempFile("odtParser",".prt");
                        writer = new OutputStreamWriter(new FileOutputStream(writerFile),"UTF-8");
                    } else {
--- a/source/de/anomic/document/parser/pdfParser.java
+++ b/source/de/anomic/document/parser/pdfParser.java
@ -43,14 +43,14 @@ import org.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
 import org.pdfbox.util.PDFTextStripper;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.server.serverCharBuffer;
 import de.anomic.yacy.yacyURL;

-public class pdfParser extends AbstractParser implements Parser {
+public class pdfParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
@ -122,7 +122,7 @@ public class pdfParser extends AbstractParser implements Parser {
            }            
            
            // creating a writer for output
-            if ((this.contentLength == -1) || (this.contentLength > Parser.MAX_KEEP_IN_MEMORY_SIZE)) {
+            if ((this.contentLength == -1) || (this.contentLength > Idiom.MAX_KEEP_IN_MEMORY_SIZE)) {
                writerFile = File.createTempFile("pdfParser",".prt");
                writer = new OutputStreamWriter(new FileOutputStream(writerFile),"UTF-8");
            } else {
--- a/source/de/anomic/document/parser/pptParser.java
+++ b/source/de/anomic/document/parser/pptParser.java
@ -34,12 +34,12 @@ import java.util.Hashtable;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.yacy.yacyURL;

-public class pptParser extends AbstractParser implements Parser {
+public class pptParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/psParser.java
+++ b/source/de/anomic/document/parser/psParser.java
@ -37,13 +37,13 @@ import java.io.InputStreamReader;
 import java.util.Hashtable;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;

-public class psParser extends AbstractParser implements Parser {
+public class psParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/rpmParser.java
+++ b/source/de/anomic/document/parser/rpmParser.java
@ -38,7 +38,7 @@ import com.jguild.jrpm.io.datatype.DataTypeIf;

 import de.anomic.crawler.HTTPLoader;
 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.http.httpClient;
@ -51,7 +51,7 @@ import de.anomic.yacy.yacyURL;
 * @author theli
 *
 */
-public class rpmParser extends AbstractParser implements Parser {
+public class rpmParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/rssParser.java
+++ b/source/de/anomic/document/parser/rssParser.java
@ -39,7 +39,7 @@ import java.util.Map;

 import de.anomic.content.RSSMessage;
 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.document.parser.html.AbstractScraper;
@ -53,7 +53,7 @@ import de.anomic.kelondro.util.FileUtils;
 import de.anomic.server.serverCharBuffer;
 import de.anomic.yacy.yacyURL;

-public class rssParser extends AbstractParser implements Parser {
+public class rssParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/rtfParser.java
+++ b/source/de/anomic/document/parser/rtfParser.java
@ -34,12 +34,12 @@ import javax.swing.text.DefaultStyledDocument;
 import javax.swing.text.rtf.RTFEditorKit;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.yacy.yacyURL;

-public class rtfParser extends AbstractParser implements Parser {
+public class rtfParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/sevenzipParser.java
+++ b/source/de/anomic/document/parser/sevenzipParser.java
@ -41,8 +41,9 @@ import SevenZip.Archive.IInArchive;
 import SevenZip.Archive.SevenZipEntry;
 import SevenZip.Archive.SevenZip.Handler;
 import de.anomic.document.AbstractParser;
+import de.anomic.document.Classification;
+import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
-import de.anomic.document.ParserDispatcher;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.kelondro.util.FileUtils;
@ -50,7 +51,7 @@ import de.anomic.server.serverCachedFileOutputStream;
 import de.anomic.yacy.yacyURL;
 import de.anomic.yacy.logging.Log;

-public class sevenzipParser extends AbstractParser implements Parser {
+public class sevenzipParser extends AbstractParser implements Idiom {
    
    /**
     * a list of mime types that are supported by this parser class
@ -99,14 +100,14 @@ public class sevenzipParser extends AbstractParser implements Parser {
    @Override
    public Document parse(final yacyURL location, final String mimeType, final String charset,
            final byte[] source) throws ParserException, InterruptedException {
-        return parse(location, mimeType, charset, new ByteArrayIInStream(source), Parser.MAX_KEEP_IN_MEMORY_SIZE - source.length);
+        return parse(location, mimeType, charset, new ByteArrayIInStream(source), Idiom.MAX_KEEP_IN_MEMORY_SIZE - source.length);
    }
    
    @Override
    public Document parse(final yacyURL location, final String mimeType, final String charset,
            final File sourceFile) throws ParserException, InterruptedException {
        try {
-            return parse(location, mimeType, charset, new MyRandomAccessFile(sourceFile, "r"), Parser.MAX_KEEP_IN_MEMORY_SIZE);
+            return parse(location, mimeType, charset, new MyRandomAccessFile(sourceFile, "r"), Idiom.MAX_KEEP_IN_MEMORY_SIZE);
        } catch (final IOException e) {
            throw new ParserException("error processing 7zip archive", location, e);
        }
@ -115,7 +116,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
    public Document parse(final yacyURL location, final String mimeType, final String charset,
            final InputStream source) throws ParserException, InterruptedException {
        try {
-            final serverCachedFileOutputStream cfos = new serverCachedFileOutputStream(Parser.MAX_KEEP_IN_MEMORY_SIZE);
+            final serverCachedFileOutputStream cfos = new serverCachedFileOutputStream(Idiom.MAX_KEEP_IN_MEMORY_SIZE);
            FileUtils.copy(source, cfos);
            if (cfos.isFallback()) {
                return parse(location, mimeType, charset, cfos.getContentFile());
@ -189,11 +190,11 @@ public class sevenzipParser extends AbstractParser implements Parser {
                     // workaround for relative links in file, normally '#' shall be used behind the location, see
                     // below for reversion of the effects
                     final yacyURL url = yacyURL.newURL(doc.dc_source(), this.prefix + "/" + super.filePath);
-                     final String mime = ParserDispatcher.getMimeTypeByFileExt(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
+                     final String mime = Classification.getMimeTypeByFileExt(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
                     if (this.cfos.isFallback()) {
-                         theDoc = ParserDispatcher.parseSource(url, mime, null, this.cfos.getContentFile());
+                         theDoc = Parser.parseSource(url, mime, null, this.cfos.getContentFile());
                     } else {
-                         theDoc = ParserDispatcher.parseSource(url, mime, null, this.cfos.getContentBAOS());
+                         theDoc = Parser.parseSource(url, mime, null, this.cfos.getContentBAOS());
                     }
                     
                     this.doc.addSubDocument(theDoc);
--- a/source/de/anomic/document/parser/swfParser.java
+++ b/source/de/anomic/document/parser/swfParser.java
@ -33,12 +33,12 @@ import java.util.Hashtable;

 import pt.tumba.parser.swf.SWF2HTML;
 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.yacy.yacyURL;

-public class swfParser extends AbstractParser implements Parser {
+public class swfParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/tarParser.java
+++ b/source/de/anomic/document/parser/tarParser.java
@ -43,8 +43,9 @@ import com.ice.tar.TarEntry;
 import com.ice.tar.TarInputStream;

 import de.anomic.document.AbstractParser;
+import de.anomic.document.Classification;
+import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
-import de.anomic.document.ParserDispatcher;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.document.parser.html.ContentScraper;
@ -53,7 +54,7 @@ import de.anomic.kelondro.util.ByteBuffer;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;

-public class tarParser extends AbstractParser implements Parser {
+public class tarParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
@ -85,7 +86,7 @@ public class tarParser extends AbstractParser implements Parser {
        File outputFile = null;
        Document subDoc = null;        
        try {           
-            if ((this.contentLength == -1) || (this.contentLength > Parser.MAX_KEEP_IN_MEMORY_SIZE)) {
+            if ((this.contentLength == -1) || (this.contentLength > Idiom.MAX_KEEP_IN_MEMORY_SIZE)) {
                outputFile = File.createTempFile("zipParser",".prt");
                docText = new BufferedOutputStream(new FileOutputStream(outputFile));
            } else {
@ -96,7 +97,7 @@ public class tarParser extends AbstractParser implements Parser {
             * If the mimeType was not reported correcly by the webserve we
             * have to decompress it first
             */
-            final String ext = ParserDispatcher.getFileExt(location).toLowerCase();
+            final String ext = Classification.getFileExt(location).toLowerCase();
            if (ext.equals("gz") || ext.equals("tgz")) {
                source = new GZIPInputStream(source);
            }
@ -129,7 +130,7 @@ public class tarParser extends AbstractParser implements Parser {
                final String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                
                // trying to determine the mimeType per file extension   
-                final String entryMime = ParserDispatcher.getMimeTypeByFileExt(entryExt);
+                final String entryMime = Classification.getMimeTypeByFileExt(entryExt);
                
                // getting the entry content
                File subDocTempFile = null;
@ -144,7 +145,7 @@ public class tarParser extends AbstractParser implements Parser {
                    checkInterruption();
                    
                    // parsing the content                    
-                    subDoc = ParserDispatcher.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile);
+                    subDoc = Parser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null,subDocTempFile);
                } catch (final ParserException e) {
                    this.theLogger.logInfo("Unable to parse tar file entry '" + entryName + "'. " + e.getMessage());
                } finally {
--- a/source/de/anomic/document/parser/vcfParser.java
+++ b/source/de/anomic/document/parser/vcfParser.java
@ -39,7 +39,7 @@ import java.util.LinkedList;

 import de.anomic.crawler.HTTPLoader;
 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.http.httpClient;
@ -53,7 +53,7 @@ import de.anomic.yacy.yacyURL;
 * @author theli
 *
 */
-public class vcfParser extends AbstractParser implements Parser {
+public class vcfParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/vsdParser.java
+++ b/source/de/anomic/document/parser/vsdParser.java
@ -31,14 +31,14 @@ import java.io.InputStream;
 import java.util.Hashtable;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.yacy.yacyURL;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hpsf.SummaryInformation;

-public class vsdParser extends AbstractParser implements Parser {
+public class vsdParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
--- a/source/de/anomic/document/parser/xlsParser.java
+++ b/source/de/anomic/document/parser/xlsParser.java
@ -40,12 +40,12 @@ import org.apache.poi.hssf.record.SSTRecord;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;

 import de.anomic.document.AbstractParser;
-import de.anomic.document.Parser;
+import de.anomic.document.Idiom;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.yacy.yacyURL;

-public class xlsParser extends AbstractParser implements Parser, HSSFListener {
+public class xlsParser extends AbstractParser implements Idiom, HSSFListener {

    //StringBuilder for parsed text
    private StringBuilder sbFoundStrings = null;
--- a/source/de/anomic/document/parser/zipParser.java
+++ b/source/de/anomic/document/parser/zipParser.java
@ -41,8 +41,9 @@ import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;

 import de.anomic.document.AbstractParser;
+import de.anomic.document.Classification;
+import de.anomic.document.Idiom;
 import de.anomic.document.Parser;
-import de.anomic.document.ParserDispatcher;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.document.parser.html.ContentScraper;
@ -51,7 +52,7 @@ import de.anomic.kelondro.util.ByteBuffer;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;

-public class zipParser extends AbstractParser implements Parser {
+public class zipParser extends AbstractParser implements Idiom {

    /**
     * a list of mime types that are supported by this parser class
@ -85,7 +86,7 @@ public class zipParser extends AbstractParser implements Parser {
        File outputFile = null;
        Document subDoc = null;
        try {           
-            if ((this.contentLength == -1) || (this.contentLength > Parser.MAX_KEEP_IN_MEMORY_SIZE)) {
+            if ((this.contentLength == -1) || (this.contentLength > Idiom.MAX_KEEP_IN_MEMORY_SIZE)) {
                outputFile = File.createTempFile("zipParser",".prt");
                docText = new BufferedOutputStream(new FileOutputStream(outputFile));
            } else {
@ -117,7 +118,7 @@ public class zipParser extends AbstractParser implements Parser {
                final String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                
                // trying to determine the mimeType per file extension   
-                final String entryMime = ParserDispatcher.getMimeTypeByFileExt(entryExt);      
+                final String entryMime = Classification.getMimeTypeByFileExt(entryExt);      
                
                // parsing the content
                File subDocTempFile = null;
@ -129,7 +130,7 @@ public class zipParser extends AbstractParser implements Parser {
                    FileUtils.copy(zippedContent,subDocTempFile,entry.getSize());                    
                    
                    // parsing the zip file entry
-                    subDoc = ParserDispatcher.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile);
+                    subDoc = Parser.parseSource(yacyURL.newURL(location,"#" + entryName),entryMime,null, subDocTempFile);
                } catch (final ParserException e) {
                    this.theLogger.logInfo("Unable to parse zip file entry '" + entryName + "'. " + e.getMessage());
                } finally {
--- a/source/de/anomic/http/httpdFileHandler.java
+++ b/source/de/anomic/http/httpdFileHandler.java
@ -80,7 +80,7 @@ import java.util.Properties;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.zip.GZIPOutputStream;

-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
 import de.anomic.document.parser.htmlParser;
 import de.anomic.document.parser.html.ContentScraper;
 import de.anomic.document.parser.html.ScraperInputStream;
@ -231,7 +231,7 @@ public final class httpdFileHandler {
        }
        headers.put(httpHeader.SERVER, "AnomicHTTPD (www.anomic.de)");
        headers.put(httpHeader.DATE, DateFormatter.formatRFC1123(new Date()));
-        if(!(ParserDispatcher.mediaExtContains(ext))){
+        if(!(Classification.mediaExtContains(ext))){
            headers.put(httpHeader.PRAGMA, "no-cache");         
        }
        return headers;
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@ -73,7 +73,7 @@ import java.util.zip.GZIPOutputStream;

 import de.anomic.crawler.HTTPLoader;
 import de.anomic.data.Blacklist;
-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
 import de.anomic.document.parser.html.ContentTransformer;
 import de.anomic.document.parser.html.Transformer;
 import de.anomic.kelondro.util.DateFormatter;
@ -528,7 +528,7 @@ public final class httpdProxyHandler {

                    final String storeError = cacheEntry.shallStoreCacheForProxy();
                    final boolean storeHTCache = cacheEntry.profile().storeHTCache();
-                    final boolean isSupportedContent = ParserDispatcher.supportedContent(cacheEntry.url(), cacheEntry.getMimeType());
+                    final boolean isSupportedContent = Classification.supportedContent(cacheEntry.url(), cacheEntry.getMimeType());
                    if (
                            /*
                             * Now we store the response into the htcache directory if
--- a/source/de/anomic/kelondro/order/NaturalOrder.java
+++ b/source/de/anomic/kelondro/order/NaturalOrder.java
@ -26,7 +26,6 @@

 package de.anomic.kelondro.order;

-import java.io.IOException;
 import java.util.Comparator;
 import java.util.Iterator;

@ -238,7 +237,7 @@ public final class NaturalOrder extends AbstractOrder<byte[]> implements ByteOrd
        return sb.toString();
    }
    
-    public static Iterator<Long> LongIterator(Iterator<byte[]> b256Iterator) throws IOException {
+    public static Iterator<Long> LongIterator(Iterator<byte[]> b256Iterator) {
        return new LongIter(b256Iterator);
    }
    
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@ -41,7 +41,7 @@ import java.io.InputStream;
 import java.util.HashMap;
 import java.util.Map;

-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
 import de.anomic.http.httpResponseHeader;
 import de.anomic.http.httpDocument;
 import de.anomic.kelondro.blob.ArrayStack;
@ -181,7 +181,7 @@ public final class plasmaHTCache {
    }

    public static boolean isText(final String mimeType) {
-        return ParserDispatcher.supportedMimeTypesContains(mimeType);
+        return Classification.supportedMimeTypesContains(mimeType);
    }

    public static boolean noIndexingURL(final yacyURL url) {
@ -200,7 +200,7 @@ public final class plasmaHTCache {

        //php
        
-        return ParserDispatcher.mediaExtContains(urlString);
+        return Classification.mediaExtContains(urlString);
    }


--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -144,7 +144,8 @@ import de.anomic.data.wiki.wikiBoard;
 import de.anomic.data.wiki.wikiCode;
 import de.anomic.data.wiki.wikiParser;
 import de.anomic.document.Condenser;
-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
 import de.anomic.document.Word;
 import de.anomic.document.Document;
@ -517,13 +518,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        
        // define an extension-blacklist
        log.logConfig("Parser: Initializing Extension Mappings for Media/Parser");
-        ParserDispatcher.initMediaExt(ParserDispatcher.extString2extList(getConfig(plasmaSwitchboardConstants.PARSER_MEDIA_EXT,"")));
-        ParserDispatcher.initSupportedHTMLFileExt(ParserDispatcher.extString2extList(getConfig(plasmaSwitchboardConstants.PARSER_MEDIA_EXT_PARSEABLE,"")));
+        Classification.initMediaExt(Classification.extString2extList(getConfig(plasmaSwitchboardConstants.PARSER_MEDIA_EXT,"")));
+        Classification.initSupportedHTMLFileExt(Classification.extString2extList(getConfig(plasmaSwitchboardConstants.PARSER_MEDIA_EXT_PARSEABLE,"")));
        
        // define a realtime parsable mimetype list
        log.logConfig("Parser: Initializing Mime Types");
-        ParserDispatcher.initHTMLParsableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES_HTML, "application/xhtml+xml,text/html,text/plain"));
-        ParserDispatcher.addParseableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES, null));
+        Classification.initHTMLParsableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES_HTML, "application/xhtml+xml,text/html,text/plain"));
+        Classification.addParseableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES, null));
        
        // start a loader
        log.logConfig("Starting Crawl Loader");
@ -1097,7 +1098,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
         * 
         * Testing if the content type is supported by the available parsers
         * ========================================================================= */
-        final boolean isSupportedContent = ParserDispatcher.supportedContent(entry.url(),entry.getMimeType());
+        final boolean isSupportedContent = Classification.supportedContent(entry.url(),entry.getMimeType());
        if (log.isFinest()) log.logFinest("STORE "+ entry.url() +" content of type "+ entry.getMimeType() +" is supported: "+ isSupportedContent);
        
        /* =========================================================================
@ -1689,7 +1690,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.

        try {
            // parse the document
-            document = ParserDispatcher.parseSource(entry.url(), entry.getMimeType(), entry.getCharacterEncoding(), plasmaHTCache.getResourceContent(entry.url()));
+            document = Parser.parseSource(entry.url(), entry.getMimeType(), entry.getCharacterEncoding(), plasmaHTCache.getResourceContent(entry.url()));
            assert(document != null) : "Unexpected error. Parser returned null.";
        } catch (final ParserException e) {
            this.log.logWarning("Unable to parse the resource '" + entry.url() + "'. " + e.getMessage());
--- a/source/de/anomic/search/SnippetCache.java
+++ b/source/de/anomic/search/SnippetCache.java
@ -39,7 +39,8 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import de.anomic.document.Condenser;
-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
 import de.anomic.document.Word;
 import de.anomic.document.Document;
@ -871,25 +872,25 @@ public class SnippetCache {
                if (    // if no extension is available
                        (p < 0) ||
                        // or the extension is supported by one of the parsers
-                        ((p >= 0) && (ParserDispatcher.supportedFileExtContains(filename.substring(p + 1))))
+                        ((p >= 0) && (Classification.supportedFileExtContains(filename.substring(p + 1))))
                ) {
                    String supposedMime = "text/html";

                    // if the mimeType Parser is installed we can set the mimeType to null to force
                    // a mimetype detection
-                    if (ParserDispatcher.supportedMimeTypesContains("application/octet-stream")) {
+                    if (Classification.supportedMimeTypesContains("application/octet-stream")) {
                        supposedMime = null;
                    } else if (p != -1){
                        // otherwise we try to determine the mimeType per file Extension
-                        supposedMime = ParserDispatcher.getMimeTypeByFileExt(filename.substring(p + 1));
+                        supposedMime = Classification.getMimeTypeByFileExt(filename.substring(p + 1));
                    }

-                    return ParserDispatcher.parseSource(url, supposedMime, null, contentLength, resourceStream);
+                    return Parser.parseSource(url, supposedMime, null, contentLength, resourceStream);
                }
                return null;
            }            
-            if (ParserDispatcher.supportedMimeTypesContains(responseHeader.mime())) {
-                return ParserDispatcher.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), contentLength, resourceStream);
+            if (Classification.supportedMimeTypesContains(responseHeader.mime())) {
+                return Parser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), contentLength, resourceStream);
            }
            return null;
        } catch (final InterruptedException e) {
--- a/source/de/anomic/tools/mediawikiIndex.java
+++ b/source/de/anomic/tools/mediawikiIndex.java
@ -58,7 +58,8 @@ import java.util.concurrent.TimeoutException;

 import de.anomic.data.wiki.wikiCode;
 import de.anomic.data.wiki.wikiParser;
-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.Classification;
+import de.anomic.document.Parser;
 import de.anomic.document.ParserException;
 import de.anomic.document.Document;
 import de.anomic.kelondro.util.ByteBuffer;
@ -102,8 +103,8 @@ public class mediawikiIndex extends Thread {
        this.count = 0;
        this.start = 0;
        // must be called before usage:
-        ParserDispatcher.initHTMLParsableMimeTypes("text/html");
-        ParserDispatcher.addParseableMimeTypes("text/html");
+        Classification.initHTMLParsableMimeTypes("text/html");
+        Classification.addParseableMimeTypes("text/html");
    }
    
    /**
@ -145,8 +146,8 @@ public class mediawikiIndex extends Thread {
            StringBuilder sb = new StringBuilder();
            boolean page = false, text = false;
            String title = null;
-            ParserDispatcher.initHTMLParsableMimeTypes("text/html");
-            ParserDispatcher.addParseableMimeTypes("text/html");
+            Classification.initHTMLParsableMimeTypes("text/html");
+            Classification.addParseableMimeTypes("text/html");
            wikiparserrecord poison = newRecord();
            int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
            BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
@ -487,7 +488,7 @@ public class mediawikiIndex extends Thread {
        public void genDocument() throws InterruptedException, ParserException {
            try {
 				url = new yacyURL(urlStub + title, null);
-				document = ParserDispatcher.parseSource(url, "text/html", "utf-8", html.getBytes("UTF-8"));
+				document = Parser.parseSource(url, "text/html", "utf-8", html.getBytes("UTF-8"));
 				// the wiki parser is not able to find the proper title in the source text, so it must be set here
 				document.setTitle(title);
 			} catch (UnsupportedEncodingException e) {
--- a/source/de/anomic/yacy/dht/FlatWordPartitionScheme.java
+++ b/source/de/anomic/yacy/dht/FlatWordPartitionScheme.java
@ -28,6 +28,12 @@ package de.anomic.yacy.dht;
 import de.anomic.kelondro.order.Base64Order;
 import de.anomic.yacy.yacySeed;

+/**
+ * A flat word partition scheme is a metric for words on the range of a distributed
+ * hash table. The dht is reflected by a 0..Long.MAX_VALUE integer range, each word gets
+ * a number on that range. To compute a number, the hash representation is used to compute
+ * the hash position from the first 63 bits of the b64 hash string.
+ */
 public class FlatWordPartitionScheme implements PartitionScheme {

    public static final FlatWordPartitionScheme std = new FlatWordPartitionScheme();
--- a/source/de/anomic/ymage/ymageChart.java
+++ b/source/de/anomic/ymage/ymageChart.java
@ -147,12 +147,11 @@ public class ymageChart extends ymageMatrix {
   
    public static void main(final String[] args) {
        System.setProperty("java.awt.headless", "true");
-        final boolean invers = false;
-        final String bg = (invers) ? "000000" : "FFFFFF";
-        final String fg = (invers) ? "FFFFFF" : "000000";
-        final String scale = (invers) ? "333333" : "CCCCCC";
-        final String green = (invers) ? "008800" : "008800";
-        final String blue = (invers) ? "0000FF" : "0000FF";
+        final String bg = "FFFFFF";
+        final String fg = "000000";
+        final String scale = "CCCCCC";
+        final String green = "008800";
+        final String blue = "0000FF";
        final ymageChart ip = new ymageChart(660, 240, bg, fg, fg, 30, 30, 20, 20, "PEER PERFORMANCE GRAPH: PAGES/MINUTE and USED MEMORY", "");
        ip.declareDimension(DIMENSION_BOTTOM, 60, 60, -600, fg, scale, "TIME/SECONDS");
        //ip.declareDimension(DIMENSION_TOP, 10, 40, "000000", null, "count");