From 8ca1f5d400c370f2c61d1abe8c406967fd75cb6f Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Thu, 9 Jul 2009 20:56:30 +0000
Subject: [PATCH] - some work to integrate the html parser the same way as the
 other parsers are integrated (not finished) - added migration of code of
 settings pages (hmm.. does not work correctly yet, sorry) - more refactoring
 - removed more unused code

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6187 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 defaults/yacy.init                            |  13 +-
 htroot/SettingsAck_p.java                     |  57 ++---
 htroot/Settings_Parser.inc                    |  22 +-
 htroot/Settings_p.java                        |  49 +---
 source/de/anomic/document/AbstractParser.java |  16 +-
 .../de/anomic/document/ParserDispatcher.java  | 220 +-----------------
 .../de/anomic/document/parser/bzipParser.java |  12 +-
 .../de/anomic/document/parser/docParser.java  |  10 +-
 .../de/anomic/document/parser/gzipParser.java |   8 +-
 .../document/parser/html/ContentScraper.java  |   4 +-
 .../de/anomic/document/parser/htmlParser.java | 195 ++++++++++++++++
 .../document/parser/mimeTypeParser.java       |  13 +-
 .../de/anomic/document/parser/odtParser.java  |   8 +-
 .../de/anomic/document/parser/pdfParser.java  |  14 +-
 .../de/anomic/document/parser/pptParser.java  |  15 +-
 .../de/anomic/document/parser/psParser.java   |  10 +-
 .../de/anomic/document/parser/rpmParser.java  |  10 +-
 .../de/anomic/document/parser/rssParser.java  |  10 +-
 .../de/anomic/document/parser/rtfParser.java  |  10 +-
 .../document/parser/sevenzipParser.java       |  10 +-
 .../de/anomic/document/parser/swfParser.java  |   8 +-
 .../de/anomic/document/parser/tarParser.java  |  10 +-
 .../de/anomic/document/parser/vcfParser.java  |  12 +-
 .../de/anomic/document/parser/vsdParser.java  |  11 +-
 .../de/anomic/document/parser/xlsParser.java  |  11 +-
 .../de/anomic/document/parser/zipParser.java  |   8 +-
 source/de/anomic/http/httpdFileHandler.java   |   3 +-
 .../de/anomic/plasma/plasmaSwitchboard.java   |   2 +-
 .../plasma/plasmaSwitchboardConstants.java    |   5 +-
 source/de/anomic/search/RankingProcess.java   |   2 -
 source/migration.java                         |   6 -
 31 files changed, 283 insertions(+), 501 deletions(-)
 create mode 100644 source/de/anomic/document/parser/htmlParser.java

diff --git a/defaults/yacy.init b/defaults/yacy.init
index a2ca90d95..6624a162e 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -249,16 +249,15 @@ minimumGlobalDelta = 500
 
 # the following mime-types are the whitelist for indexing
 #
-# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
-# parseableMime: specifies mime-types that can be indexed but not on the fly
-parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml
+# parseableMime: specifies mime-types that can be indexed with any built-in parser
 parseableMimeTypes=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.CRAWLER=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.PROXY=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.ICAP=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.URLREDIRECTOR=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
+
+# parseableMimeTypes.IMAGE: specifies mime-types that refer to image type content
 parseableMimeTypes.IMAGE=image/gif,image/jpeg,image/png,image/tiff,image/vnd.wap.wbmp,image/x-icon,image/bmp
 
+# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
+parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml
+
 # media extension string
 # a comma-separated list of extensions that denote media file formats
 # this is important to recognize <a href> - tags as not-html reference
diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java
index d995e9e53..d35ba6609 100644
--- a/htroot/SettingsAck_p.java
+++ b/htroot/SettingsAck_p.java
@@ -33,7 +33,6 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
@@ -458,54 +457,34 @@ public class SettingsAck_p {
          */
         if (post.containsKey("parserSettings")) {
             post.remove("parserSettings");
-            /*
-            final Set<String> parserModes = ParserDispatcher.getParserConfigList().keySet();
-            final HashMap<String, HashSet<String>> newConfigList = new HashMap<String, HashSet<String>>();     
-            Iterator<String> parserModeIter = parserModes.iterator();
-            while (parserModeIter.hasNext()) {
-                final String currParserMode = parserModeIter.next();
-                newConfigList.put(currParserMode, new HashSet<String>());
-            }
             
-            // looping through all received settings
-            int pos;
+            final HashSet<String> newConfig = new HashSet<String>();
+            
+            // loop through all received settings
             final Iterator<String> keyEnum = post.keySet().iterator();
-            while (keyEnum.hasNext()) {                
-                final String key = keyEnum.next();
-                if ((pos = key.indexOf(".")) != -1) {
-                    final String currParserMode = key.substring(0,pos).trim().toUpperCase();
-                    final String currMimeType = key.substring(pos+1).replaceAll("\n", "");
-                    if (parserModes.contains(currParserMode)) {
-                        HashSet<String> currEnabledMimeTypes;
-                        assert (newConfigList.containsKey(currParserMode)) : "Unexpected Error";
-                        currEnabledMimeTypes = newConfigList.get(currParserMode);
-                        currEnabledMimeTypes.add(currMimeType);
-                    }
-                }
+            while (keyEnum.hasNext()) {
+                String key = keyEnum.next();
+                if (key.startsWith("mimename")) newConfig.add(post.get(key));
             }
             
             int enabledMimesCount = 0;
             final StringBuilder currEnabledMimesTxt = new StringBuilder();
-            parserModeIter = newConfigList.keySet().iterator();
-            while (parserModeIter.hasNext()) {                
-                final String currParserMode = parserModeIter.next();
-                final String[] enabledMimes = ParserDispatcher.setEnabledParserList(newConfigList.get(currParserMode));
-                Arrays.sort(enabledMimes);
-                
-                currEnabledMimesTxt.setLength(0);
-                for (int i=0; i < enabledMimes.length; i++) {
-                    currEnabledMimesTxt.append(enabledMimes[i]).append(",");
-                    prop.put("info_parser_" + enabledMimesCount + "_parserMode",currParserMode);
-                    prop.put("info_parser_" + enabledMimesCount + "_enabledMime",enabledMimes[i]);
-                    enabledMimesCount++;
-                }
-                if (currEnabledMimesTxt.length() > 0) currEnabledMimesTxt.deleteCharAt(currEnabledMimesTxt.length()-1);  
-                env.setConfig("parseableMimeTypes." + currParserMode,currEnabledMimesTxt.toString());
+            final String[] enabledMimes = ParserDispatcher.setEnabledParserList(newConfig);
+            Arrays.sort(enabledMimes);
+            
+            currEnabledMimesTxt.setLength(0);
+            for (int i=0; i < enabledMimes.length; i++) {
+                currEnabledMimesTxt.append(enabledMimes[i]).append(",");
+                prop.put("info_parser_" + enabledMimesCount + "_enabledMime", newConfig.toString());
+                enabledMimesCount++;
             }
+            if (currEnabledMimesTxt.length() > 0) currEnabledMimesTxt.deleteCharAt(currEnabledMimesTxt.length()-1);  
+            env.setConfig("parseableMimeTypes", currEnabledMimesTxt.toString());
+            
             prop.put("info_parser",enabledMimesCount);
             prop.put("info", "18");
             return prop;
-          */
+          
         }
         
         // Crawler settings
diff --git a/htroot/Settings_Parser.inc b/htroot/Settings_Parser.inc
index 621a3af0c..6038a4be6 100644
--- a/htroot/Settings_Parser.inc
+++ b/htroot/Settings_Parser.inc
@@ -6,31 +6,27 @@
   <a href="http://www.iana.org/assignments/media-types/">http://www.iana.org/assignments/media-types/</a>
 </p>
 <table border="0" cellpadding="2" cellspacing="1">
-  <tr class="TableHeader" valign="bottom">#{parserMode}#
-    <td class="small" >#[name]#</td>#{/parserMode}#
+  <tr class="TableHeader" valign="bottom">
+    <td class="small" >enable/disable Parser</td>
     <td class="small" >Mime-Type</td>
-    <td class="small" >Parser&nbsp;Usage</td>
   </tr>#{parser}#
   <tr class="TableCellDark">
-    <td colspan="#[colspan]#">#[name]# V#[version]#</td>
-    <td>&nbsp;</td>
-    <td>#[usage]#</td>
+    <td colspan="#[colspan]#">#[name]#</td>
+    <td>&nbsp;</td>
   </tr>#{mime}#
-  <tr class="TableCellLight">#{parserMode}#
-    <td class="small" align="center"><input type="checkbox" name="#[optionName]#" #(status)#::checked="checked" #(/status)#/></td>#{/parserMode}#
+  <tr class="TableCellLight">
+    <td class="small" align="center"><input type="checkbox" mimename.#[mimetype]#="#[mimetype]#" #(status)#::checked="checked" #(/status)#/></td>
     <td class="small">#[mimetype]#</td>
-    <td class="small">&nbsp;</td>
   </tr>#{/mime}#
   #{/parser}#
-  <tr class="TableCellDark">#{parserMode}#
+  <tr class="TableCellDark">
     <td class="small" align="center">
       <input type="checkbox" name="#[name]#.allParserEnabled" onclick="javascript: ParserCheckboxes(this);" #(allParserEnabled)#::checked="checked" #(/allParserEnabled)#/>
-    </td>#{/parserMode}#
-	 <td>&nbsp;</td>    
+    </td>
     <td colspan="2" class="small">Enable all parsers</td>
   </tr>
   <tr class="TableCellDark">
-    <td colspan="#[parser.colspan]#" class="small" ><input type="submit" name="parserSettings" value="Submit" /> Changes take effect immediately</td>
+    <td colspan="2" class="small" ><input type="submit" name="parserSettings" value="Submit" /> Changes take effect immediately</td>
   </tr>
 </table>
 </fieldset>
diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java
index 2dbf77d95..4631c6aee 100644
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@@ -26,11 +26,10 @@
 
 import java.util.Enumeration;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 
+import de.anomic.document.Parser;
 import de.anomic.document.ParserDispatcher;
-import de.anomic.document.ParserConfig;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.plasma.plasmaSwitchboard;
@@ -217,47 +216,19 @@ public final class Settings_p {
         /*
          * Parser Configuration
          */
-        /*
-        final HashMap<String, plasmaParserConfig> configList = ParserDispatcher.getParserConfigList();        
-        final plasmaParserConfig[] configArray = configList.values().toArray(new plasmaParserConfig[configList.size()]);
-        
-        final HashSet<ParserInfo> parserInfos = new HashSet<ParserInfo>(ParserDispatcher.getAvailableParserList().values());
-        
-//        // fetching a list of all available mimetypes
-//        List availableParserKeys = Arrays.asList(availableParsers.entrySet().toArray(new ParserInfo[availableParsers.size()]));
-//        
-//        // sort it
-//        Collections.sort(availableParserKeys);
-        
-        // loop through the mimeTypes and add it to the properties
-        final boolean[] allParsersEnabled = new boolean[configList.size()];
-        for (int i=0; i<configArray.length; i++)
-        	allParsersEnabled[i] = true;
         int parserIdx = 0;
         
-        final Iterator<ParserInfo> availableParserIter = parserInfos.iterator();
+        final Iterator<Parser> availableParserIter = ParserDispatcher.availableParserList.values().iterator();
         while (availableParserIter.hasNext()) {
-            final ParserInfo parserInfo = availableParserIter.next();
-            prop.put("parser_" + parserIdx + "_name", parserInfo.parserName);
-            prop.putXML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr);
-            prop.put("parser_" + parserIdx + "_usage", parserInfo.usageCount);
-            prop.put("parser_" + parserIdx + "_colspan", configArray.length);
+            final Parser parserInfo = availableParserIter.next();
+            prop.put("parser_" + parserIdx + "_name", parserInfo.getName());
             
             int mimeIdx = 0;
-            final Enumeration<String> mimeTypeIter = parserInfo.supportedMimeTypes.keys();
+            final Enumeration<String> mimeTypeIter = parserInfo.getSupportedMimeTypes().keys();
             while (mimeTypeIter.hasMoreElements()) {
                 final String mimeType = mimeTypeIter.nextElement();
-                
                 prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_mimetype", mimeType);
-                //prop.put("parser_" + parserIdx + "_name", parserName);
-                //prop.put("parser_" + parserIdx + "_shortname", parserName.substring(parserName.lastIndexOf(".")+1));
-                for (int i=0; i<configArray.length; i++) {
-                    final HashSet<String> enabledParsers =  configArray[i].getEnabledParserList();
-                    prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_parserMode_" + i + "_optionName", configArray[i].parserMode + "." + mimeType);
-                    prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_parserMode_" + i + "_status", enabledParsers.contains(mimeType) ? "1" : "0");
-                    allParsersEnabled[i] &= enabledParsers.contains(mimeType);
-                }
-                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_parserMode", configArray.length);
+                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", (ParserDispatcher.supportedMimeTypesContains(mimeType)) ? 1 : 0);
                 mimeIdx++;
             }
             prop.put("parser_" + parserIdx + "_mime", mimeIdx);
@@ -265,14 +236,8 @@ public final class Settings_p {
             parserIdx++;
         }
         
-        for (int i=0; i<configArray.length; i++) {
-            prop.put("parserMode_" + i + "_name",configArray[i].parserMode);
-            prop.put("parserMode_" + i + "_allParserEnabled",allParsersEnabled[i] ? "1" : "0");
-        }
-        prop.put("parserMode",configArray.length);
         prop.put("parser", parserIdx);
-        prop.put("parser.colspan", configArray.length+2);
-        */
+        
         // Crawler settings
         prop.putHTML("crawler.clientTimeout",sb.getConfig("crawler.clientTimeout", "10000"));
         prop.putHTML("crawler.http.maxFileSize",sb.getConfig("crawler.http.maxFileSize", "-1"));
diff --git a/source/de/anomic/document/AbstractParser.java b/source/de/anomic/document/AbstractParser.java
index fd90f1b00..d9948f8a7 100644
--- a/source/de/anomic/document/AbstractParser.java
+++ b/source/de/anomic/document/AbstractParser.java
@@ -45,11 +45,6 @@ import de.anomic.yacy.logging.Log;
  */
 public abstract class AbstractParser implements Parser {
     
-    /**
-     * a list of library names that are needed by this parser
-     */
-    protected String[] libxDependencies = null;
-    
     /**
      * the logger class that should be used by the parser module for logging
      * purposes.
@@ -70,9 +65,8 @@ public abstract class AbstractParser implements Parser {
     /**
      * The Constructor of this class.
      */
-	public AbstractParser(final String[] libxDependencies) {
+	public AbstractParser() {
 		super();
-        this.libxDependencies = libxDependencies;
 	}
     
     /**
@@ -229,14 +223,6 @@ public abstract class AbstractParser implements Parser {
      * @see de.anomic.document.Parser#parse(de.anomic.net.URL, java.lang.String, java.io.InputStream)
      */
     public abstract Document parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException;
-
-    /**
-     * @return Returns a list of library names that are needed by this parser
-     * @see de.anomic.document.Parser#getLibxDependences()
-     */
-    public String[] getLibxDependences() {
-        return this.libxDependencies;
-    }
     
     /**
      * Return the name of the parser
diff --git a/source/de/anomic/document/ParserDispatcher.java b/source/de/anomic/document/ParserDispatcher.java
index 64678afb2..70700a208 100644
--- a/source/de/anomic/document/ParserDispatcher.java
+++ b/source/de/anomic/document/ParserDispatcher.java
@@ -7,11 +7,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.UnsupportedCharsetException;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -26,6 +22,7 @@ import java.util.Set;
 import de.anomic.document.parser.bzipParser;
 import de.anomic.document.parser.docParser;
 import de.anomic.document.parser.gzipParser;
+import de.anomic.document.parser.htmlParser;
 import de.anomic.document.parser.mimeTypeParser;
 import de.anomic.document.parser.odtParser;
 import de.anomic.document.parser.pdfParser;
@@ -41,17 +38,13 @@ import de.anomic.document.parser.vcfParser;
 import de.anomic.document.parser.vsdParser;
 import de.anomic.document.parser.xlsParser;
 import de.anomic.document.parser.zipParser;
-import de.anomic.document.parser.html.ContentScraper;
 import de.anomic.document.parser.html.ImageEntry;
-import de.anomic.document.parser.html.ScraperInputStream;
-import de.anomic.document.parser.html.TransformerWriter;
-import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;
 import de.anomic.yacy.logging.Log;
 
 public final class ParserDispatcher {
  
- private static final ParserConfig parserConfig = new ParserConfig();
+ public static final ParserConfig parserConfig = new ParserConfig();
  
  /**
   * A list containing all installed parsers and the mimeType that they support
@@ -91,30 +84,6 @@ public final class ParserDispatcher {
  private static final HashSet<String> videoExtSet = new HashSet<String>();
  private static final HashSet<String> appsExtSet = new HashSet<String>();
  
- /**
-  * This {@link FilenameFilter} is used to find all classes based on there filenames 
-  * which seems to be additional content parsers.
-  * Currently the filenames of all content parser classes must end with <code>Parser.class</code> 
-  */
- /*
- private static final FilenameFilter parserFileNameFilter = new FilenameFilter() {
-     public boolean accept(File dir, String name) {
-         return name.endsWith("Parser.class");
-     }
- };
- */
- 
- /**
-  * This {@link FileFilter} is used to get all subpackages
-  * of the parser package.
-  */
- /*
- private static final FileFilter parserDirectoryFilter = new FileFilter() {
-     public boolean accept(File file) {
-         return file.isDirectory();
-     }
- };
- */    
  
  /**
   * Initializing the 
@@ -146,9 +115,6 @@ public final class ParserDispatcher {
  
  private static final Log theLogger = new Log("PARSER");
  
- public Log getLogger() {
-     return theLogger;
- }
  
  /**
   * This function is used to initialize the HTMLParsableMimeTypes List.
@@ -187,59 +153,42 @@ public final class ParserDispatcher {
      }
  }
  
- public static void initImageExt(final List<String> imageExtList) {
+ private static void initImageExt(final List<String> imageExtList) {
      synchronized (imageExtSet) {
          imageExtSet.addAll(imageExtList);
      }
  }
  
- public static void initAudioExt(final List<String> audioExtList) {
+ private static void initAudioExt(final List<String> audioExtList) {
      synchronized (audioExtSet) {
          audioExtSet.addAll(audioExtList);
      }
  }
  
- public static void initVideoExt(final List<String> videoExtList) {
+ private static void initVideoExt(final List<String> videoExtList) {
      synchronized (videoExtSet) {
          videoExtSet.addAll(videoExtList);
      }
  }
  
- public static void initAppsExt(final List<String> appsExtList) {
+ private static void initAppsExt(final List<String> appsExtList) {
      synchronized (appsExtSet) {
          appsExtSet.addAll(appsExtList);
      }
  }
  
- public static String getMediaExtList() {
-     synchronized (mediaExtSet) {
-         return mediaExtSet.toString();
-     }        
- }
- 
  public static void initSupportedHTMLFileExt(final List<String> supportedRealtimeFileExtList) {
      synchronized (supportedHTMLFileExt) {
          supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
      }
  }
      
- public static boolean HTMLParsableMimeTypesContains(String mimeType) {
+ private static boolean HTMLParsableMimeTypesContains(String mimeType) {
      mimeType = normalizeMimeType(mimeType);
      synchronized (supportedHTMLMimeTypes) {
          return supportedHTMLMimeTypes.contains(mimeType);
      }
  }
- 
- public static boolean supportedHTMLContent(final yacyURL url, final String mimeType) {
-     return HTMLParsableMimeTypesContains(mimeType) && supportedHTMLFileExtContains(url);
- }    
- 
- public static boolean supportedHTMLFileExtContains(final yacyURL url) {
-     final String fileExt = getFileExt(url);
-     synchronized (supportedHTMLFileExt) {
-         return supportedHTMLFileExt.contains(fileExt);
-     }   
- }
 
  public static String getFileExt(final yacyURL url) {
      // getting the file path
@@ -300,81 +249,6 @@ public final class ParserDispatcher {
      }
  }
 
- /**
-  * some html authors use wrong encoding names, either because they don't know exactly what they
-  * are doing or they produce a type. Many times, the upper/downcase scheme of the name is fuzzy
-  * This method patches wrong encoding names. The correct names are taken from
-  * http://www.iana.org/assignments/character-sets
-  * @param encoding
-  * @return patched encoding name
-  */
- public static String patchCharsetEncoding(String encoding) {
-     
-     // return the system default encoding
-     if ((encoding == null) || (encoding.length() < 3)) return Charset.defaultCharset().name();
-     
-     // trim encoding string
-     encoding = encoding.trim();
-
-     // fix upper/lowercase
-     encoding = encoding.toUpperCase();
-     if (encoding.startsWith("SHIFT")) return "Shift_JIS";
-     if (encoding.startsWith("BIG")) return "Big5";
-     // all other names but such with "windows" use uppercase
-     if (encoding.startsWith("WINDOWS")) encoding = "windows" + encoding.substring(7);
-     if (encoding.startsWith("MACINTOSH")) encoding = "MacRoman";
-     
-     // fix wrong fill characters
-     encoding = encoding.replaceAll("_", "-");
-
-     if (encoding.matches("GB[_-]?2312([-_]80)?")) return "GB2312";
-     if (encoding.matches(".*UTF[-_]?8.*")) return "UTF-8";
-     if (encoding.startsWith("US")) return "US-ASCII";
-     if (encoding.startsWith("KOI")) return "KOI8-R";
-     
-     // patch missing '-'
-     if (encoding.startsWith("windows") && encoding.length() > 7) {
-         final char c = encoding.charAt(7);
-         if ((c >= '0') && (c <= '9')) {
-             encoding = "windows-" + encoding.substring(7);
-         }
-     }
-     
-     if (encoding.startsWith("ISO")) {
-         // patch typos
-         if (encoding.length() > 3) {
-             final char c = encoding.charAt(3);
-             if ((c >= '0') && (c <= '9')) {
-                 encoding = "ISO-" + encoding.substring(3);
-             }
-         }
-         if (encoding.length() > 8) {
-             final char c = encoding.charAt(8);
-             if ((c >= '0') && (c <= '9')) {
-                 encoding = encoding.substring(0, 8) + "-" + encoding.substring(8);           
-             } 
-         }
-     }
-     
-     // patch wrong name
-     if (encoding.startsWith("ISO-8559")) {
-         // popular typo
-         encoding = "ISO-8859" + encoding.substring(8);
-     }
-
-     // converting cp\d{4} -> windows-\d{4}
-     if (encoding.matches("CP([_-])?125[0-8]")) {
-         final char c = encoding.charAt(2);
-         if ((c >= '0') && (c <= '9')) {
-             encoding = "windows-" + encoding.substring(2);
-         } else {
-             encoding = "windows" + encoding.substring(2);
-         }
-     }
-
-     return encoding;
- }
- 
  public static String normalizeMimeType(String mimeType) {
      //if (mimeType == null) doMimeTypeAnalysis
      if (mimeType == null) mimeType = "application/octet-stream";
@@ -519,7 +393,7 @@ public final class ParserDispatcher {
          
          // getting the charset of the document
          // TODO: do a charset detection here ....
-         final String documentCharset = patchCharsetEncoding(theDocumentCharset);
+         final String documentCharset = htmlParser.patchCharsetEncoding(theDocumentCharset);
          
          // testing if parsing is supported for this resource
          if (!supportedContent(location,mimeType)) {
@@ -543,7 +417,7 @@ public final class ParserDispatcher {
              // parse the resource
              doc = theParser.parse(location, mimeType,documentCharset,sourceStream);
          } else if (HTMLParsableMimeTypesContains(mimeType)) {
-             doc = parseHtml(location, mimeType, documentCharset, sourceStream);
+             doc = new htmlParser().parse(location, mimeType, documentCharset, sourceStream);
          } else {
              final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (2)";
              theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
@@ -558,17 +432,6 @@ public final class ParserDispatcher {
          }
          return doc;
          
-     } catch (final UnsupportedEncodingException e) {
-         final String errorMsg = "unsupported charset encoding: " + e.getMessage();
-         theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
-         throw new ParserException(errorMsg,location, errorMsg);                 
-     } catch (final IOException e) {
-         // IOExceptions may occur during html parsing when a server closes the connection during reading.
-         // This may happen here, because the html parser is a streaming parser
-         // that produces surrogates while the connection is active
-         final String errorMsg = "IOException - server may have closed the connection. " + e.getMessage();
-         theLogger.logWarning("Unable to parse '" + location + "'. " + errorMsg);
-         throw new ParserException(errorMsg, location, errorMsg);
      } catch (final Exception e) {
          // Interrupted- and Parser-Exceptions should pass through
          if (e instanceof InterruptedException) throw (InterruptedException) e;
@@ -586,71 +449,8 @@ public final class ParserDispatcher {
      }        
  }
  
- private static Document parseHtml(
-         final yacyURL location, 
-         final String mimeType, 
-         final String documentCharset, 
-         final InputStream sourceStream) throws IOException, ParserException {
-     
-     // make a scraper and transformer
-     final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
-     String charset = htmlFilter.detectCharset();
-     if (charset == null) {
-         charset = documentCharset;
-     } else {
-         charset = patchCharsetEncoding(charset);
-     }
-     
-     if (!documentCharset.equalsIgnoreCase(charset)) {
-         theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
-     }
-     
-     Charset c;
-     try {
-         c = Charset.forName(charset);
-     } catch (IllegalCharsetNameException e) {
-         c = Charset.defaultCharset();
-     } catch (UnsupportedCharsetException e) {
-         c = Charset.defaultCharset();
-     }
-     
-     // parsing the content
-     final ContentScraper scraper = new ContentScraper(location);        
-     final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false);
-     FileUtils.copy(htmlFilter, writer, c);
-     writer.close();
-     //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);            
-     //serverFileUtils.copy(sourceFile, hfos);
-     //hfos.close();
-     if (writer.binarySuspect()) {
-         final String errorMsg = "Binary data found in resource";
-         theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg);
-         throw new ParserException(errorMsg,location);    
-     }
-     return transformScraper(location, mimeType, documentCharset, scraper);
- }
  
- public static Document transformScraper(final yacyURL location, final String mimeType, final String charSet, final ContentScraper scraper) {
-     final String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length];
-     int p = 0;
-     for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j];
-     final Document ppd =  new Document(
-             location,
-             mimeType,
-             charSet,
-             scraper.getContentLanguages(),
-             scraper.getKeywords(),
-             scraper.getTitle(),
-             scraper.getAuthor(),
-             sections,
-             scraper.getDescription(),
-             scraper.getText(),
-             scraper.getAnchors(),
-             scraper.getImages());
-     //scraper.close();            
-     ppd.setFavicon(scraper.getFavicon());
-     return ppd;
- }
+
  
  /**
   * This function is used to determine the parser class that should be used for a given
diff --git a/source/de/anomic/document/parser/bzipParser.java b/source/de/anomic/document/parser/bzipParser.java
index baefa1316..33de51192 100644
--- a/source/de/anomic/document/parser/bzipParser.java
+++ b/source/de/anomic/document/parser/bzipParser.java
@@ -53,21 +53,15 @@ public class bzipParser extends AbstractParser implements Parser {
     static { 
         SUPPORTED_MIME_TYPES.put("application/x-bzip2",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/bzip2", fileExtensions);
-        SUPPORTED_MIME_TYPES.put("application/x-bz2", fileExtensions);
+        SUPPORTED_MIME_TYPES.put("application/x-bz2", fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/octet-stream",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/x-bzip",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/x-compressed",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/x-stuffit",fileExtensions);
-    }     
-    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};
+    }
     
     public bzipParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Bzip 2 UNIX Compressed File Parser";
     }
     
diff --git a/source/de/anomic/document/parser/docParser.java b/source/de/anomic/document/parser/docParser.java
index fe7aaf532..c76e96d65 100644
--- a/source/de/anomic/document/parser/docParser.java
+++ b/source/de/anomic/document/parser/docParser.java
@@ -58,16 +58,8 @@ public class docParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/x-msword","doc");
     }
     
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "tm-extractors-1.0.jar"
-    };    
-    
 	public docParser() {
-		super(LIBX_DEPENDENCIES);
+		super();
         this.parserName = "Word Document Parser";
 	}
 
diff --git a/source/de/anomic/document/parser/gzipParser.java b/source/de/anomic/document/parser/gzipParser.java
index 195dc95df..730ed7690 100644
--- a/source/de/anomic/document/parser/gzipParser.java
+++ b/source/de/anomic/document/parser/gzipParser.java
@@ -62,14 +62,8 @@ public class gzipParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/x-tar",fileExtensions);
     }     
 
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};    
-    
     public gzipParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "GNU Zip Compressed Archive Parser";
     }
     
diff --git a/source/de/anomic/document/parser/html/ContentScraper.java b/source/de/anomic/document/parser/html/ContentScraper.java
index ac78fe894..45644f9e0 100644
--- a/source/de/anomic/document/parser/html/ContentScraper.java
+++ b/source/de/anomic/document/parser/html/ContentScraper.java
@@ -45,7 +45,7 @@ import java.util.Properties;
 import javax.swing.event.EventListenerList;
 
 import de.anomic.crawler.HTTPLoader;
-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.parser.htmlParser;
 import de.anomic.http.httpClient;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRequestHeader;
@@ -501,7 +501,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
         
         // scrape document to look up charset
         final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page),"UTF-8",new yacyURL("http://localhost", null),null,false);
-        final String charset = ParserDispatcher.patchCharsetEncoding(htmlFilter.detectCharset());
+        final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
         
         // scrape content
         final ContentScraper scraper = new ContentScraper(new yacyURL("http://localhost", null));
diff --git a/source/de/anomic/document/parser/htmlParser.java b/source/de/anomic/document/parser/htmlParser.java
new file mode 100644
index 000000000..f441b8e21
--- /dev/null
+++ b/source/de/anomic/document/parser/htmlParser.java
@@ -0,0 +1,195 @@
+package de.anomic.document.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Hashtable;
+
+import de.anomic.document.AbstractParser;
+import de.anomic.document.Document;
+import de.anomic.document.Parser;
+import de.anomic.document.ParserException;
+import de.anomic.document.parser.html.ContentScraper;
+import de.anomic.document.parser.html.ScraperInputStream;
+import de.anomic.document.parser.html.TransformerWriter;
+import de.anomic.kelondro.util.FileUtils;
+import de.anomic.yacy.yacyURL;
+
+public class htmlParser extends AbstractParser implements Parser {
+
+    /**
+     * a list of mime types that are supported by this parser class
+     * @see #getSupportedMimeTypes()
+     */
+    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();  
+    static { 
+        SUPPORTED_MIME_TYPES.put("application/xhtml+xml","htm,html,xhtml,php,asp");
+        SUPPORTED_MIME_TYPES.put("text/html","htm,html,xhtml,php,asp");
+        SUPPORTED_MIME_TYPES.put("text/plain","htm,html,xhtml,php,asp,txt");
+        SUPPORTED_MIME_TYPES.put("text/sgml","htm,html,xhtml,php,asp,xml");
+    }
+    
+    public htmlParser() {
+        super();
+        this.parserName = "streaming html parser"; 
+    }
+    
+    @Override
+    public Document parse(
+            final yacyURL location, 
+            final String mimeType, 
+            final String documentCharset, 
+            final InputStream sourceStream) throws ParserException, InterruptedException {
+        
+        // make a scraper and transformer
+        final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
+        String charset = null;
+        try {
+            charset = htmlFilter.detectCharset();
+        } catch (IOException e1) {
+            throw new ParserException("Charset error:" + e1.getMessage(), location);
+        }
+        if (charset == null) {
+            charset = documentCharset;
+        } else {
+            charset = patchCharsetEncoding(charset);
+        }
+        
+        if (!documentCharset.equalsIgnoreCase(charset)) {
+            theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
+        }
+        
+        Charset c;
+        try {
+            c = Charset.forName(charset);
+        } catch (IllegalCharsetNameException e) {
+            c = Charset.defaultCharset();
+        } catch (UnsupportedCharsetException e) {
+            c = Charset.defaultCharset();
+        }
+        
+        // parsing the content
+        final ContentScraper scraper = new ContentScraper(location);        
+        final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false);
+        try {
+            FileUtils.copy(htmlFilter, writer, c);
+            writer.close();
+        } catch (IOException e) {
+            throw new ParserException("IO error:" + e.getMessage(), location);
+        }
+        //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);            
+        //serverFileUtils.copy(sourceFile, hfos);
+        //hfos.close();
+        if (writer.binarySuspect()) {
+            final String errorMsg = "Binary data found in resource";
+            theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg);
+            throw new ParserException(errorMsg,location);    
+        }
+        return transformScraper(location, mimeType, documentCharset, scraper);
+    }
+
+    private static Document transformScraper(final yacyURL location, final String mimeType, final String charSet, final ContentScraper scraper) {
+        final String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length];
+        int p = 0;
+        for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j];
+        final Document ppd =  new Document(
+                location,
+                mimeType,
+                charSet,
+                scraper.getContentLanguages(),
+                scraper.getKeywords(),
+                scraper.getTitle(),
+                scraper.getAuthor(),
+                sections,
+                scraper.getDescription(),
+                scraper.getText(),
+                scraper.getAnchors(),
+                scraper.getImages());
+        //scraper.close();            
+        ppd.setFavicon(scraper.getFavicon());
+        return ppd;
+    }
+
+
+    /**
+     * some html authors use wrong encoding names, either because they don't know exactly what they
+     * are doing or they produce a type. Many times, the upper/downcase scheme of the name is fuzzy
+     * This method patches wrong encoding names. The correct names are taken from
+     * http://www.iana.org/assignments/character-sets
+     * @param encoding
+     * @return patched encoding name
+     */
+    public static String patchCharsetEncoding(String encoding) {
+        
+        // return the system default encoding
+        if ((encoding == null) || (encoding.length() < 3)) return Charset.defaultCharset().name();
+        
+        // trim encoding string
+        encoding = encoding.trim();
+
+        // fix upper/lowercase
+        encoding = encoding.toUpperCase();
+        if (encoding.startsWith("SHIFT")) return "Shift_JIS";
+        if (encoding.startsWith("BIG")) return "Big5";
+        // all other names but such with "windows" use uppercase
+        if (encoding.startsWith("WINDOWS")) encoding = "windows" + encoding.substring(7);
+        if (encoding.startsWith("MACINTOSH")) encoding = "MacRoman";
+        
+        // fix wrong fill characters
+        encoding = encoding.replaceAll("_", "-");
+
+        if (encoding.matches("GB[_-]?2312([-_]80)?")) return "GB2312";
+        if (encoding.matches(".*UTF[-_]?8.*")) return "UTF-8";
+        if (encoding.startsWith("US")) return "US-ASCII";
+        if (encoding.startsWith("KOI")) return "KOI8-R";
+        
+        // patch missing '-'
+        if (encoding.startsWith("windows") && encoding.length() > 7) {
+            final char c = encoding.charAt(7);
+            if ((c >= '0') && (c <= '9')) {
+                encoding = "windows-" + encoding.substring(7);
+            }
+        }
+        
+        if (encoding.startsWith("ISO")) {
+            // patch typos
+            if (encoding.length() > 3) {
+                final char c = encoding.charAt(3);
+                if ((c >= '0') && (c <= '9')) {
+                    encoding = "ISO-" + encoding.substring(3);
+                }
+            }
+            if (encoding.length() > 8) {
+                final char c = encoding.charAt(8);
+                if ((c >= '0') && (c <= '9')) {
+                    encoding = encoding.substring(0, 8) + "-" + encoding.substring(8);           
+                } 
+            }
+        }
+        
+        // patch wrong name
+        if (encoding.startsWith("ISO-8559")) {
+            // popular typo
+            encoding = "ISO-8859" + encoding.substring(8);
+        }
+
+        // converting cp\d{4} -> windows-\d{4}
+        if (encoding.matches("CP([_-])?125[0-8]")) {
+            final char c = encoding.charAt(2);
+            if ((c >= '0') && (c <= '9')) {
+                encoding = "windows-" + encoding.substring(2);
+            } else {
+                encoding = "windows" + encoding.substring(2);
+            }
+        }
+
+        return encoding;
+    }
+    
+    public Hashtable<String, String> getSupportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+    
+}
diff --git a/source/de/anomic/document/parser/mimeTypeParser.java b/source/de/anomic/document/parser/mimeTypeParser.java
index 58baa2f97..387d1cd7d 100644
--- a/source/de/anomic/document/parser/mimeTypeParser.java
+++ b/source/de/anomic/document/parser/mimeTypeParser.java
@@ -64,17 +64,6 @@ public class mimeTypeParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/x-compressed","");
     } 
     
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "commons-logging-1.1.1.jar",
-        "jmimemagic-0.1.0.jar",
-        "jakarta-oro-2.0.7.jar",
-        "log4j-1.2.9.jar"
-    };
-    
     /**
      * Helping structure used to detect loops in the mimeType detection
      * process
@@ -82,7 +71,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
     private static Hashtable<Thread, Integer> threadLoopDetection = new Hashtable<Thread, Integer>();
     
     public mimeTypeParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "MimeType Parser"; 
     }
     
diff --git a/source/de/anomic/document/parser/odtParser.java b/source/de/anomic/document/parser/odtParser.java
index 1d47f8f38..289e8d397 100644
--- a/source/de/anomic/document/parser/odtParser.java
+++ b/source/de/anomic/document/parser/odtParser.java
@@ -69,14 +69,8 @@ public class odtParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt");
     }     
 
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {"odf_utils_05_11_29.jar"};        
-    
     public odtParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "OASIS OpenDocument V2 Text Document Parser"; 
     }
     
diff --git a/source/de/anomic/document/parser/pdfParser.java b/source/de/anomic/document/parser/pdfParser.java
index 334f311cf..ce54e79da 100644
--- a/source/de/anomic/document/parser/pdfParser.java
+++ b/source/de/anomic/document/parser/pdfParser.java
@@ -56,7 +56,7 @@ public class pdfParser extends AbstractParser implements Parser {
      * a list of mime types that are supported by this parser class
      * @see #getSupportedMimeTypes()
      */
-    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();  
+    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
     static {
         SUPPORTED_MIME_TYPES.put("application/pdf","pdf");
         SUPPORTED_MIME_TYPES.put("application/x-pdf","pdf");
@@ -64,18 +64,10 @@ public class pdfParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("applications/vnd.pdf","pdf");
         SUPPORTED_MIME_TYPES.put("text/pdf","pdf");
         SUPPORTED_MIME_TYPES.put("text/x-pdf","pdf");
-    }
-    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "PDFBox-0.7.3.jar", "FontBox-0.1.0-dev.jar", "bcprov-jdk14-139.jar", "bcmail-jdk14-139.jar"
-    };        
+    }
     
     public pdfParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Acrobat Portable Document Parser"; 
     }
     
diff --git a/source/de/anomic/document/parser/pptParser.java b/source/de/anomic/document/parser/pptParser.java
index 37e333ae0..59ca84ae1 100644
--- a/source/de/anomic/document/parser/pptParser.java
+++ b/source/de/anomic/document/parser/pptParser.java
@@ -47,7 +47,7 @@ public class pptParser extends AbstractParser implements Parser {
      */
     public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
     static final String fileExtensions = "ppt,pps";
-    static { 
+    static { 
         SUPPORTED_MIME_TYPES.put("application/mspowerpoint",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/powerpoint",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/vnd.ms-powerpoint",fileExtensions);
@@ -56,19 +56,10 @@ public class pptParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/vnd-mspowerpoint",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/x-powerpoint",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/x-m",fileExtensions);
-   }     
-
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "poi-3.2-FINAL-20081019.jar",
-        "poi-scratchpad-3.2-FINAL-20081019.jar"
-    }; 
+   }
 
     public pptParser(){
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Microsoft Powerpoint Parser";
     }
 
diff --git a/source/de/anomic/document/parser/psParser.java b/source/de/anomic/document/parser/psParser.java
index 0efae22b6..cd84998f3 100644
--- a/source/de/anomic/document/parser/psParser.java
+++ b/source/de/anomic/document/parser/psParser.java
@@ -55,20 +55,14 @@ public class psParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/x-postscript","ps");
         SUPPORTED_MIME_TYPES.put("application/x-ps","ps");
         SUPPORTED_MIME_TYPES.put("application/x-postscript-not-eps","ps");
-    }     
-    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};          
+    }
     
     private final static Object modeScan = new Object();
     private static boolean modeScanDone = false;
     private static String parserMode = "java";
     
     public psParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "PostScript Document Parser"; 
         if (!modeScanDone) synchronized (modeScan) {
         	if (testForPs2Ascii()) parserMode = "ps2ascii";
diff --git a/source/de/anomic/document/parser/rpmParser.java b/source/de/anomic/document/parser/rpmParser.java
index aa156e8e1..6039cd23e 100644
--- a/source/de/anomic/document/parser/rpmParser.java
+++ b/source/de/anomic/document/parser/rpmParser.java
@@ -62,16 +62,10 @@ public class rpmParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/x-rpm","rpm");
         SUPPORTED_MIME_TYPES.put("application/x-redhat packet manager","rpm");    
         SUPPORTED_MIME_TYPES.put("application/x-redhat-package-manager","rpm");         
-    }     
-
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {"jrpm-head.jar"};        
+    }
     
     public rpmParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "rpm Parser"; 
     }
     
diff --git a/source/de/anomic/document/parser/rssParser.java b/source/de/anomic/document/parser/rssParser.java
index 552e65ca0..9cacdcc40 100644
--- a/source/de/anomic/document/parser/rssParser.java
+++ b/source/de/anomic/document/parser/rssParser.java
@@ -66,16 +66,10 @@ public class rssParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/rdf+xml",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/rss+xml",fileExtensions);
         SUPPORTED_MIME_TYPES.put("application/atom+xml",fileExtensions);
-    }     
-  
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};       
+    }
     
 	public rssParser() {
-		super(LIBX_DEPENDENCIES);
+		super();
         this.parserName = "Rich Site Summary/Atom Feed Parser"; 
 	}
 
diff --git a/source/de/anomic/document/parser/rtfParser.java b/source/de/anomic/document/parser/rtfParser.java
index 35346b7a6..b8d0e83a3 100644
--- a/source/de/anomic/document/parser/rtfParser.java
+++ b/source/de/anomic/document/parser/rtfParser.java
@@ -55,15 +55,9 @@ public class rtfParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/doc","rtf");
         SUPPORTED_MIME_TYPES.put("application/x-soffice","rtf");
     } 
-    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};    
-    
+
 	public rtfParser() {
-		super(LIBX_DEPENDENCIES);
+		super();
         this.parserName = "Rich Text Format Parser";  
 	}
 
diff --git a/source/de/anomic/document/parser/sevenzipParser.java b/source/de/anomic/document/parser/sevenzipParser.java
index 2c3b4e711..674a7b63b 100644
--- a/source/de/anomic/document/parser/sevenzipParser.java
+++ b/source/de/anomic/document/parser/sevenzipParser.java
@@ -59,16 +59,10 @@ public class sevenzipParser extends AbstractParser implements Parser {
     public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>(); 
     static { 
         SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z"); 
-    } 
-    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] { "J7Zip-modified.jar" };
+    }
     
     public sevenzipParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
         super.parserName = "7zip Archive Parser";
     }
     
diff --git a/source/de/anomic/document/parser/swfParser.java b/source/de/anomic/document/parser/swfParser.java
index 5b2dfefe0..688c53387 100644
--- a/source/de/anomic/document/parser/swfParser.java
+++ b/source/de/anomic/document/parser/swfParser.java
@@ -52,14 +52,8 @@ public class swfParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("image/vnd.rn-realflash","swf");
     }
 
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {"webcat-0.1-swf.jar"};
-
     public swfParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Adobe Flash Parser";
     }
 
diff --git a/source/de/anomic/document/parser/tarParser.java b/source/de/anomic/document/parser/tarParser.java
index 6ef630e01..bc06a4066 100644
--- a/source/de/anomic/document/parser/tarParser.java
+++ b/source/de/anomic/document/parser/tarParser.java
@@ -69,16 +69,8 @@ public class tarParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/x-compressed","tar");
     }     
 
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-//        "tar.jar"
-    };    
-    
     public tarParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Tape Archive File Parser"; 
     }
     
diff --git a/source/de/anomic/document/parser/vcfParser.java b/source/de/anomic/document/parser/vcfParser.java
index ef5f054c7..c7f89c978 100644
--- a/source/de/anomic/document/parser/vcfParser.java
+++ b/source/de/anomic/document/parser/vcfParser.java
@@ -63,23 +63,17 @@ public class vcfParser extends AbstractParser implements Parser {
      */
     public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
     static { 
-        SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf");
+        SUPPORTED_MIME_TYPES.put("text/x-vcard","vcf");
         SUPPORTED_MIME_TYPES.put("application/vcard","vcf");
         SUPPORTED_MIME_TYPES.put("text/anytext","vcf");
         SUPPORTED_MIME_TYPES.put("text/directory","vcf");
         SUPPORTED_MIME_TYPES.put("application/x-versit","vcf");
         SUPPORTED_MIME_TYPES.put("text/x-versit","vcf");
         SUPPORTED_MIME_TYPES.put("text/x-vcalendar","vcf");
-    }
-
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};        
+    }
     
     public vcfParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "vCard Parser"; 
     }
     
diff --git a/source/de/anomic/document/parser/vsdParser.java b/source/de/anomic/document/parser/vsdParser.java
index 5072fd305..01ac2c32e 100644
--- a/source/de/anomic/document/parser/vsdParser.java
+++ b/source/de/anomic/document/parser/vsdParser.java
@@ -56,17 +56,8 @@ public class vsdParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("zz-application/zz-winassoc-vsd","vsd");
     }
 
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "poi-3.2-FINAL-20081019.jar",
-        "poi-scratchpad-3.2-FINAL-20081019.jar",
-    }; 
-
     public vsdParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Microsoft Visio Parser";
     }
 
diff --git a/source/de/anomic/document/parser/xlsParser.java b/source/de/anomic/document/parser/xlsParser.java
index 62ff94943..afb73c463 100644
--- a/source/de/anomic/document/parser/xlsParser.java
+++ b/source/de/anomic/document/parser/xlsParser.java
@@ -69,17 +69,8 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
         SUPPORTED_MIME_TYPES.put("application/xls","xls");
     }     
 
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "poi-3.2-FINAL-20081019.jar",
-        "poi-scratchpad-3.2-FINAL-20081019.jar"
-    }; 
-
     public xlsParser(){
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Microsoft Excel Parser";
     }
 
diff --git a/source/de/anomic/document/parser/zipParser.java b/source/de/anomic/document/parser/zipParser.java
index 013abc0dc..59cbe7b8a 100644
--- a/source/de/anomic/document/parser/zipParser.java
+++ b/source/de/anomic/document/parser/zipParser.java
@@ -69,14 +69,8 @@ public class zipParser extends AbstractParser implements Parser {
         SUPPORTED_MIME_TYPES.put("application/java-archive","jar");
     }     
 
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};        
-    
     public zipParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
         this.parserName = "Compressed Archive File Parser"; 
     }
     
diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java
index 3de08bd34..cf0d969d6 100644
--- a/source/de/anomic/http/httpdFileHandler.java
+++ b/source/de/anomic/http/httpdFileHandler.java
@@ -81,6 +81,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.zip.GZIPOutputStream;
 
 import de.anomic.document.ParserDispatcher;
+import de.anomic.document.parser.htmlParser;
 import de.anomic.document.parser.html.ContentScraper;
 import de.anomic.document.parser.html.ScraperInputStream;
 import de.anomic.kelondro.util.ByteBuffer;
@@ -860,7 +861,7 @@ public final class httpdFileHandler {
                     			fis.mark(1000);
                     			// scrape document to look up charset
                     			final ScraperInputStream htmlFilter = new ScraperInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
-                    			final String charset = ParserDispatcher.patchCharsetEncoding(htmlFilter.detectCharset());
+                    			final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
                     			if(charset != null)
                     				mimeType = mimeType + "; charset="+charset;
                     			// reset position
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 0b43a0265..1a01ff4e5 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -523,7 +523,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
         // define a realtime parsable mimetype list
         log.logConfig("Parser: Initializing Mime Types");
         ParserDispatcher.initHTMLParsableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES_HTML, "application/xhtml+xml,text/html,text/plain"));
-        ParserDispatcher.addParseableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES_CRAWLER, null));
+        ParserDispatcher.addParseableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES, null));
         
         // start a loader
         log.logConfig("Starting Crawl Loader");
diff --git a/source/de/anomic/plasma/plasmaSwitchboardConstants.java b/source/de/anomic/plasma/plasmaSwitchboardConstants.java
index 19f635275..31a306e12 100644
--- a/source/de/anomic/plasma/plasmaSwitchboardConstants.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardConstants.java
@@ -244,11 +244,8 @@ public final class plasmaSwitchboardConstants {
     public static final String RANKING_DIST_1_METHOD            = "CRDist1Method";
     public static final String RANKING_DIST_1_PERCENT           = "CRDist1Percent";
     public static final String RANKING_DIST_1_TARGET            = "CRDist1Target";
+    public static final String PARSER_MIMETYPES                 = "parseableMimeTypes";
     public static final String PARSER_MIMETYPES_HTML            = "parseableMimeTypes.HTML";
-    public static final String PARSER_MIMETYPES_PROXY           = "parseableMimeTypes.PROXY";
-    public static final String PARSER_MIMETYPES_CRAWLER         = "parseableMimeTypes.CRAWLER";
-    public static final String PARSER_MIMETYPES_ICAP            = "parseableMimeTypes.ICAP";
-    public static final String PARSER_MIMETYPES_URLREDIRECTOR   = "parseableMimeTypes.URLREDIRECTOR";
     public static final String PARSER_MIMETYPES_IMAGE           = "parseableMimeTypes.IMAGE";
     public static final String PARSER_MEDIA_EXT                 = "mediaExt";
     public static final String PARSER_MEDIA_EXT_PARSEABLE       = "parseableExt";
diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java
index 37610886c..38b7cef27 100644
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@@ -56,8 +56,6 @@ import de.anomic.kelondro.util.SortStack;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.plasma.plasmaProfiling;
 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.plasmaProfiling.searchEvent;
-import de.anomic.search.QueryEvent.ResultEntry;
 import de.anomic.server.serverProfiling;
 import de.anomic.yacy.yacyURL;
 
diff --git a/source/migration.java b/source/migration.java
index 6608ff692..4967f9144 100644
--- a/source/migration.java
+++ b/source/migration.java
@@ -233,12 +233,6 @@ public class migration {
         
         // migration for additional parser settings
         String value = "";
-        if (((value = sb.getConfig("parseableMimeTypes","")).length() > 0) && (sb.getConfig("parseableMimeTypes.CRAWLER", "").length() == 0)) {
-            sb.setConfig("parseableMimeTypes.CRAWLER", value);
-            sb.setConfig("parseableMimeTypes.PROXY", value);
-            sb.setConfig("parseableMimeTypes.URLREDIRECTOR", value);
-            sb.setConfig("parseableMimeTypes.ICAP", value);
-        }
         //Locales in DATA, because DATA must be writable, htroot not.
         if(sb.getConfig("locale.translated_html", "DATA/LOCALE/htroot").equals("htroot/locale")){
         	sb.setConfig("locale.translated_html", "DATA/LOCALE/htroot");