- some work to integrate the html parser the same way as the other parsers are integrated (not finished)

- added migration of code of settings pages (hmm.. does not work correctly yet, sorry) - more refactoring - removed more unused code git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6187 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · 8ca1f5d400
parent 1ee109761f
commit 8ca1f5d400
31 changed files with 283 additions and 501 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -249,16 +249,15 @@ minimumGlobalDelta = 500

 # the following mime-types are the whitelist for indexing
 #
-# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
-# parseableMime: specifies mime-types that can be indexed but not on the fly
-parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml
+# parseableMime: specifies mime-types that can be indexed with any built-in parser
 parseableMimeTypes=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.CRAWLER=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.PROXY=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.ICAP=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
-parseableMimeTypes.URLREDIRECTOR=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
+
+# parseableMimeTypes.IMAGE: specifies mime-types that refer to image type content
 parseableMimeTypes.IMAGE=image/gif,image/jpeg,image/png,image/tiff,image/vnd.wap.wbmp,image/x-icon,image/bmp

+# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
+parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml
+
 # media extension string
 # a comma-separated list of extensions that denote media file formats
 # this is important to recognize <a href> - tags as not-html reference
--- a/htroot/SettingsAck_p.java
+++ b/htroot/SettingsAck_p.java
@ -33,7 +33,6 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
@ -458,54 +457,34 @@ public class SettingsAck_p {
         */
        if (post.containsKey("parserSettings")) {
            post.remove("parserSettings");
-            /*
-            final Set<String> parserModes = ParserDispatcher.getParserConfigList().keySet();
-            final HashMap<String, HashSet<String>> newConfigList = new HashMap<String, HashSet<String>>();     
-            Iterator<String> parserModeIter = parserModes.iterator();
-            while (parserModeIter.hasNext()) {
-                final String currParserMode = parserModeIter.next();
-                newConfigList.put(currParserMode, new HashSet<String>());
-            }
            
-            // looping through all received settings
-            int pos;
+            final HashSet<String> newConfig = new HashSet<String>();
+            
+            // loop through all received settings
            final Iterator<String> keyEnum = post.keySet().iterator();
            while (keyEnum.hasNext()) {
-                final String key = keyEnum.next();
-                if ((pos = key.indexOf(".")) != -1) {
-                    final String currParserMode = key.substring(0,pos).trim().toUpperCase();
-                    final String currMimeType = key.substring(pos+1).replaceAll("\n", "");
-                    if (parserModes.contains(currParserMode)) {
-                        HashSet<String> currEnabledMimeTypes;
-                        assert (newConfigList.containsKey(currParserMode)) : "Unexpected Error";
-                        currEnabledMimeTypes = newConfigList.get(currParserMode);
-                        currEnabledMimeTypes.add(currMimeType);
-                    }
-                }
+                String key = keyEnum.next();
+                if (key.startsWith("mimename")) newConfig.add(post.get(key));
            }
            
            int enabledMimesCount = 0;
            final StringBuilder currEnabledMimesTxt = new StringBuilder();
-            parserModeIter = newConfigList.keySet().iterator();
-            while (parserModeIter.hasNext()) {                
-                final String currParserMode = parserModeIter.next();
-                final String[] enabledMimes = ParserDispatcher.setEnabledParserList(newConfigList.get(currParserMode));
-                Arrays.sort(enabledMimes);
-                
-                currEnabledMimesTxt.setLength(0);
-                for (int i=0; i < enabledMimes.length; i++) {
-                    currEnabledMimesTxt.append(enabledMimes[i]).append(",");
-                    prop.put("info_parser_" + enabledMimesCount + "_parserMode",currParserMode);
-                    prop.put("info_parser_" + enabledMimesCount + "_enabledMime",enabledMimes[i]);
-                    enabledMimesCount++;
-                }
-                if (currEnabledMimesTxt.length() > 0) currEnabledMimesTxt.deleteCharAt(currEnabledMimesTxt.length()-1);  
-                env.setConfig("parseableMimeTypes." + currParserMode,currEnabledMimesTxt.toString());
+            final String[] enabledMimes = ParserDispatcher.setEnabledParserList(newConfig);
+            Arrays.sort(enabledMimes);
+            
+            currEnabledMimesTxt.setLength(0);
+            for (int i=0; i < enabledMimes.length; i++) {
+                currEnabledMimesTxt.append(enabledMimes[i]).append(",");
+                prop.put("info_parser_" + enabledMimesCount + "_enabledMime", newConfig.toString());
+                enabledMimesCount++;
            }
+            if (currEnabledMimesTxt.length() > 0) currEnabledMimesTxt.deleteCharAt(currEnabledMimesTxt.length()-1);  
+            env.setConfig("parseableMimeTypes", currEnabledMimesTxt.toString());
+            
            prop.put("info_parser",enabledMimesCount);
            prop.put("info", "18");
            return prop;
-          */
+          
        }
        
        // Crawler settings
--- a/htroot/Settings_Parser.inc
+++ b/htroot/Settings_Parser.inc
@ -6,31 +6,27 @@
  <a href="http://www.iana.org/assignments/media-types/">http://www.iana.org/assignments/media-types/</a>
 </p>
 <table border="0" cellpadding="2" cellspacing="1">
-  <tr class="TableHeader" valign="bottom">#{parserMode}#
-    <td class="small" >#[name]#</td>#{/parserMode}#
+  <tr class="TableHeader" valign="bottom">
+    <td class="small" >enable/disable Parser</td>
    <td class="small" >Mime-Type</td>
-    <td class="small" >Parser&nbsp;Usage</td>
  </tr>#{parser}#
  <tr class="TableCellDark">
-    <td colspan="#[colspan]#">#[name]# V#[version]#</td>
+    <td colspan="#[colspan]#">#[name]#</td>
    <td>&nbsp;</td>
-    <td>#[usage]#</td>
  </tr>#{mime}#
-  <tr class="TableCellLight">#{parserMode}#
-    <td class="small" align="center"><input type="checkbox" name="#[optionName]#" #(status)#::checked="checked" #(/status)#/></td>#{/parserMode}#
+  <tr class="TableCellLight">
+    <td class="small" align="center"><input type="checkbox" mimename.#[mimetype]#="#[mimetype]#" #(status)#::checked="checked" #(/status)#/></td>
    <td class="small">#[mimetype]#</td>
-    <td class="small">&nbsp;</td>
  </tr>#{/mime}#
  #{/parser}#
-  <tr class="TableCellDark">#{parserMode}#
+  <tr class="TableCellDark">
    <td class="small" align="center">
      <input type="checkbox" name="#[name]#.allParserEnabled" onclick="javascript: ParserCheckboxes(this);" #(allParserEnabled)#::checked="checked" #(/allParserEnabled)#/>
-    </td>#{/parserMode}#
-	 <td>&nbsp;</td>    
+    </td>
    <td colspan="2" class="small">Enable all parsers</td>
  </tr>
  <tr class="TableCellDark">
-    <td colspan="#[parser.colspan]#" class="small" ><input type="submit" name="parserSettings" value="Submit" /> Changes take effect immediately</td>
+    <td colspan="2" class="small" ><input type="submit" name="parserSettings" value="Submit" /> Changes take effect immediately</td>
  </tr>
 </table>
 </fieldset>
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@ -26,11 +26,10 @@

 import java.util.Enumeration;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;

+import de.anomic.document.Parser;
 import de.anomic.document.ParserDispatcher;
-import de.anomic.document.ParserConfig;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.plasma.plasmaSwitchboard;
@ -217,47 +216,19 @@ public final class Settings_p {
        /*
         * Parser Configuration
         */
-        /*
-        final HashMap<String, plasmaParserConfig> configList = ParserDispatcher.getParserConfigList();        
-        final plasmaParserConfig[] configArray = configList.values().toArray(new plasmaParserConfig[configList.size()]);
-        
-        final HashSet<ParserInfo> parserInfos = new HashSet<ParserInfo>(ParserDispatcher.getAvailableParserList().values());
-        
-//        // fetching a list of all available mimetypes
-//        List availableParserKeys = Arrays.asList(availableParsers.entrySet().toArray(new ParserInfo[availableParsers.size()]));
-//        
-//        // sort it
-//        Collections.sort(availableParserKeys);
-        
-        // loop through the mimeTypes and add it to the properties
-        final boolean[] allParsersEnabled = new boolean[configList.size()];
-        for (int i=0; i<configArray.length; i++)
-        	allParsersEnabled[i] = true;
        int parserIdx = 0;
        
-        final Iterator<ParserInfo> availableParserIter = parserInfos.iterator();
+        final Iterator<Parser> availableParserIter = ParserDispatcher.availableParserList.values().iterator();
        while (availableParserIter.hasNext()) {
-            final ParserInfo parserInfo = availableParserIter.next();
-            prop.put("parser_" + parserIdx + "_name", parserInfo.parserName);
-            prop.putXML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr);
-            prop.put("parser_" + parserIdx + "_usage", parserInfo.usageCount);
-            prop.put("parser_" + parserIdx + "_colspan", configArray.length);
+            final Parser parserInfo = availableParserIter.next();
+            prop.put("parser_" + parserIdx + "_name", parserInfo.getName());
            
            int mimeIdx = 0;
-            final Enumeration<String> mimeTypeIter = parserInfo.supportedMimeTypes.keys();
+            final Enumeration<String> mimeTypeIter = parserInfo.getSupportedMimeTypes().keys();
            while (mimeTypeIter.hasMoreElements()) {
                final String mimeType = mimeTypeIter.nextElement();
-                
                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_mimetype", mimeType);
-                //prop.put("parser_" + parserIdx + "_name", parserName);
-                //prop.put("parser_" + parserIdx + "_shortname", parserName.substring(parserName.lastIndexOf(".")+1));
-                for (int i=0; i<configArray.length; i++) {
-                    final HashSet<String> enabledParsers =  configArray[i].getEnabledParserList();
-                    prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_parserMode_" + i + "_optionName", configArray[i].parserMode + "." + mimeType);
-                    prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_parserMode_" + i + "_status", enabledParsers.contains(mimeType) ? "1" : "0");
-                    allParsersEnabled[i] &= enabledParsers.contains(mimeType);
-                }
-                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_parserMode", configArray.length);
+                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", (ParserDispatcher.supportedMimeTypesContains(mimeType)) ? 1 : 0);
                mimeIdx++;
            }
            prop.put("parser_" + parserIdx + "_mime", mimeIdx);
@ -265,14 +236,8 @@ public final class Settings_p {
            parserIdx++;
        }
        
-        for (int i=0; i<configArray.length; i++) {
-            prop.put("parserMode_" + i + "_name",configArray[i].parserMode);
-            prop.put("parserMode_" + i + "_allParserEnabled",allParsersEnabled[i] ? "1" : "0");
-        }
-        prop.put("parserMode",configArray.length);
        prop.put("parser", parserIdx);
-        prop.put("parser.colspan", configArray.length+2);
-        */
+        
        // Crawler settings
        prop.putHTML("crawler.clientTimeout",sb.getConfig("crawler.clientTimeout", "10000"));
        prop.putHTML("crawler.http.maxFileSize",sb.getConfig("crawler.http.maxFileSize", "-1"));
--- a/source/de/anomic/document/AbstractParser.java
+++ b/source/de/anomic/document/AbstractParser.java
@ -45,11 +45,6 @@ import de.anomic.yacy.logging.Log;
 */
 public abstract class AbstractParser implements Parser {
    
-    /**
-     * a list of library names that are needed by this parser
-     */
-    protected String[] libxDependencies = null;
-    
    /**
     * the logger class that should be used by the parser module for logging
     * purposes.
@ -70,9 +65,8 @@ public abstract class AbstractParser implements Parser {
    /**
     * The Constructor of this class.
     */
-	public AbstractParser(final String[] libxDependencies) {
+	public AbstractParser() {
 		super();
-        this.libxDependencies = libxDependencies;
 	}
    
    /**
@ -230,14 +224,6 @@ public abstract class AbstractParser implements Parser {
     */
    public abstract Document parse(yacyURL location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException;
    
-    /**
-     * @return Returns a list of library names that are needed by this parser
-     * @see de.anomic.document.Parser#getLibxDependences()
-     */
-    public String[] getLibxDependences() {
-        return this.libxDependencies;
-    }
-    
    /**
     * Return the name of the parser
     */
--- a/source/de/anomic/document/ParserDispatcher.java
+++ b/source/de/anomic/document/ParserDispatcher.java
@ -7,11 +7,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.UnsupportedCharsetException;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@ -26,6 +22,7 @@ import java.util.Set;
 import de.anomic.document.parser.bzipParser;
 import de.anomic.document.parser.docParser;
 import de.anomic.document.parser.gzipParser;
+import de.anomic.document.parser.htmlParser;
 import de.anomic.document.parser.mimeTypeParser;
 import de.anomic.document.parser.odtParser;
 import de.anomic.document.parser.pdfParser;
@ -41,17 +38,13 @@ import de.anomic.document.parser.vcfParser;
 import de.anomic.document.parser.vsdParser;
 import de.anomic.document.parser.xlsParser;
 import de.anomic.document.parser.zipParser;
-import de.anomic.document.parser.html.ContentScraper;
 import de.anomic.document.parser.html.ImageEntry;
-import de.anomic.document.parser.html.ScraperInputStream;
-import de.anomic.document.parser.html.TransformerWriter;
-import de.anomic.kelondro.util.FileUtils;
 import de.anomic.yacy.yacyURL;
 import de.anomic.yacy.logging.Log;

 public final class ParserDispatcher {
 
- private static final ParserConfig parserConfig = new ParserConfig();
+ public static final ParserConfig parserConfig = new ParserConfig();
 
 /**
  * A list containing all installed parsers and the mimeType that they support
@ -91,30 +84,6 @@ public final class ParserDispatcher {
 private static final HashSet<String> videoExtSet = new HashSet<String>();
 private static final HashSet<String> appsExtSet = new HashSet<String>();
 
- /**
-  * This {@link FilenameFilter} is used to find all classes based on there filenames 
-  * which seems to be additional content parsers.
-  * Currently the filenames of all content parser classes must end with <code>Parser.class</code> 
-  */
- /*
- private static final FilenameFilter parserFileNameFilter = new FilenameFilter() {
-     public boolean accept(File dir, String name) {
-         return name.endsWith("Parser.class");
-     }
- };
- */
- 
- /**
-  * This {@link FileFilter} is used to get all subpackages
-  * of the parser package.
-  */
- /*
- private static final FileFilter parserDirectoryFilter = new FileFilter() {
-     public boolean accept(File file) {
-         return file.isDirectory();
-     }
- };
- */    
 
 /**
  * Initializing the 
@ -146,9 +115,6 @@ public final class ParserDispatcher {
 
 private static final Log theLogger = new Log("PARSER");
 
- public Log getLogger() {
-     return theLogger;
- }
 
 /**
  * This function is used to initialize the HTMLParsableMimeTypes List.
@ -187,60 +153,43 @@ public final class ParserDispatcher {
     }
 }
 
- public static void initImageExt(final List<String> imageExtList) {
+ private static void initImageExt(final List<String> imageExtList) {
     synchronized (imageExtSet) {
         imageExtSet.addAll(imageExtList);
     }
 }
 
- public static void initAudioExt(final List<String> audioExtList) {
+ private static void initAudioExt(final List<String> audioExtList) {
     synchronized (audioExtSet) {
         audioExtSet.addAll(audioExtList);
     }
 }
 
- public static void initVideoExt(final List<String> videoExtList) {
+ private static void initVideoExt(final List<String> videoExtList) {
     synchronized (videoExtSet) {
         videoExtSet.addAll(videoExtList);
     }
 }
 
- public static void initAppsExt(final List<String> appsExtList) {
+ private static void initAppsExt(final List<String> appsExtList) {
     synchronized (appsExtSet) {
         appsExtSet.addAll(appsExtList);
     }
 }
 
- public static String getMediaExtList() {
-     synchronized (mediaExtSet) {
-         return mediaExtSet.toString();
-     }        
- }
- 
 public static void initSupportedHTMLFileExt(final List<String> supportedRealtimeFileExtList) {
     synchronized (supportedHTMLFileExt) {
         supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
     }
 }
     
- public static boolean HTMLParsableMimeTypesContains(String mimeType) {
+ private static boolean HTMLParsableMimeTypesContains(String mimeType) {
     mimeType = normalizeMimeType(mimeType);
     synchronized (supportedHTMLMimeTypes) {
         return supportedHTMLMimeTypes.contains(mimeType);
     }
 }

- public static boolean supportedHTMLContent(final yacyURL url, final String mimeType) {
-     return HTMLParsableMimeTypesContains(mimeType) && supportedHTMLFileExtContains(url);
- }    
- 
- public static boolean supportedHTMLFileExtContains(final yacyURL url) {
-     final String fileExt = getFileExt(url);
-     synchronized (supportedHTMLFileExt) {
-         return supportedHTMLFileExt.contains(fileExt);
-     }   
- }
-
 public static String getFileExt(final yacyURL url) {
     // getting the file path
     String name = url.getPath();
@ -300,81 +249,6 @@ public final class ParserDispatcher {
     }
 }

- /**
-  * some html authors use wrong encoding names, either because they don't know exactly what they
-  * are doing or they produce a type. Many times, the upper/downcase scheme of the name is fuzzy
-  * This method patches wrong encoding names. The correct names are taken from
-  * http://www.iana.org/assignments/character-sets
-  * @param encoding
-  * @return patched encoding name
-  */
- public static String patchCharsetEncoding(String encoding) {
-     
-     // return the system default encoding
-     if ((encoding == null) || (encoding.length() < 3)) return Charset.defaultCharset().name();
-     
-     // trim encoding string
-     encoding = encoding.trim();
-
-     // fix upper/lowercase
-     encoding = encoding.toUpperCase();
-     if (encoding.startsWith("SHIFT")) return "Shift_JIS";
-     if (encoding.startsWith("BIG")) return "Big5";
-     // all other names but such with "windows" use uppercase
-     if (encoding.startsWith("WINDOWS")) encoding = "windows" + encoding.substring(7);
-     if (encoding.startsWith("MACINTOSH")) encoding = "MacRoman";
-     
-     // fix wrong fill characters
-     encoding = encoding.replaceAll("_", "-");
-
-     if (encoding.matches("GB[_-]?2312([-_]80)?")) return "GB2312";
-     if (encoding.matches(".*UTF[-_]?8.*")) return "UTF-8";
-     if (encoding.startsWith("US")) return "US-ASCII";
-     if (encoding.startsWith("KOI")) return "KOI8-R";
-     
-     // patch missing '-'
-     if (encoding.startsWith("windows") && encoding.length() > 7) {
-         final char c = encoding.charAt(7);
-         if ((c >= '0') && (c <= '9')) {
-             encoding = "windows-" + encoding.substring(7);
-         }
-     }
-     
-     if (encoding.startsWith("ISO")) {
-         // patch typos
-         if (encoding.length() > 3) {
-             final char c = encoding.charAt(3);
-             if ((c >= '0') && (c <= '9')) {
-                 encoding = "ISO-" + encoding.substring(3);
-             }
-         }
-         if (encoding.length() > 8) {
-             final char c = encoding.charAt(8);
-             if ((c >= '0') && (c <= '9')) {
-                 encoding = encoding.substring(0, 8) + "-" + encoding.substring(8);           
-             } 
-         }
-     }
-     
-     // patch wrong name
-     if (encoding.startsWith("ISO-8559")) {
-         // popular typo
-         encoding = "ISO-8859" + encoding.substring(8);
-     }
-
-     // converting cp\d{4} -> windows-\d{4}
-     if (encoding.matches("CP([_-])?125[0-8]")) {
-         final char c = encoding.charAt(2);
-         if ((c >= '0') && (c <= '9')) {
-             encoding = "windows-" + encoding.substring(2);
-         } else {
-             encoding = "windows" + encoding.substring(2);
-         }
-     }
-
-     return encoding;
- }
- 
 public static String normalizeMimeType(String mimeType) {
     //if (mimeType == null) doMimeTypeAnalysis
     if (mimeType == null) mimeType = "application/octet-stream";
@ -519,7 +393,7 @@ public final class ParserDispatcher {
         
         // getting the charset of the document
         // TODO: do a charset detection here ....
-         final String documentCharset = patchCharsetEncoding(theDocumentCharset);
+         final String documentCharset = htmlParser.patchCharsetEncoding(theDocumentCharset);
         
         // testing if parsing is supported for this resource
         if (!supportedContent(location,mimeType)) {
@ -543,7 +417,7 @@ public final class ParserDispatcher {
             // parse the resource
             doc = theParser.parse(location, mimeType,documentCharset,sourceStream);
         } else if (HTMLParsableMimeTypesContains(mimeType)) {
-             doc = parseHtml(location, mimeType, documentCharset, sourceStream);
+             doc = new htmlParser().parse(location, mimeType, documentCharset, sourceStream);
         } else {
             final String errorMsg = "No parser available to parse mimetype '" + mimeType + "' (2)";
             theLogger.logInfo("Unable to parse '" + location + "'. " + errorMsg);
@ -558,17 +432,6 @@ public final class ParserDispatcher {
         }
         return doc;
         
-     } catch (final UnsupportedEncodingException e) {
-         final String errorMsg = "unsupported charset encoding: " + e.getMessage();
-         theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
-         throw new ParserException(errorMsg,location, errorMsg);                 
-     } catch (final IOException e) {
-         // IOExceptions may occur during html parsing when a server closes the connection during reading.
-         // This may happen here, because the html parser is a streaming parser
-         // that produces surrogates while the connection is active
-         final String errorMsg = "IOException - server may have closed the connection. " + e.getMessage();
-         theLogger.logWarning("Unable to parse '" + location + "'. " + errorMsg);
-         throw new ParserException(errorMsg, location, errorMsg);
     } catch (final Exception e) {
         // Interrupted- and Parser-Exceptions should pass through
         if (e instanceof InterruptedException) throw (InterruptedException) e;
@ -586,71 +449,8 @@ public final class ParserDispatcher {
     }        
 }
 
- private static Document parseHtml(
-         final yacyURL location, 
-         final String mimeType, 
-         final String documentCharset, 
-         final InputStream sourceStream) throws IOException, ParserException {
 
-     // make a scraper and transformer
-     final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
-     String charset = htmlFilter.detectCharset();
-     if (charset == null) {
-         charset = documentCharset;
-     } else {
-         charset = patchCharsetEncoding(charset);
-     }
-     
-     if (!documentCharset.equalsIgnoreCase(charset)) {
-         theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
-     }

-     Charset c;
-     try {
-         c = Charset.forName(charset);
-     } catch (IllegalCharsetNameException e) {
-         c = Charset.defaultCharset();
-     } catch (UnsupportedCharsetException e) {
-         c = Charset.defaultCharset();
-     }
-     
-     // parsing the content
-     final ContentScraper scraper = new ContentScraper(location);        
-     final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false);
-     FileUtils.copy(htmlFilter, writer, c);
-     writer.close();
-     //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);            
-     //serverFileUtils.copy(sourceFile, hfos);
-     //hfos.close();
-     if (writer.binarySuspect()) {
-         final String errorMsg = "Binary data found in resource";
-         theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg);
-         throw new ParserException(errorMsg,location);    
-     }
-     return transformScraper(location, mimeType, documentCharset, scraper);
- }
- 
- public static Document transformScraper(final yacyURL location, final String mimeType, final String charSet, final ContentScraper scraper) {
-     final String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length];
-     int p = 0;
-     for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j];
-     final Document ppd =  new Document(
-             location,
-             mimeType,
-             charSet,
-             scraper.getContentLanguages(),
-             scraper.getKeywords(),
-             scraper.getTitle(),
-             scraper.getAuthor(),
-             sections,
-             scraper.getDescription(),
-             scraper.getText(),
-             scraper.getAnchors(),
-             scraper.getImages());
-     //scraper.close();            
-     ppd.setFavicon(scraper.getFavicon());
-     return ppd;
- }
 
 /**
  * This function is used to determine the parser class that should be used for a given
--- a/source/de/anomic/document/parser/bzipParser.java
+++ b/source/de/anomic/document/parser/bzipParser.java
@ -60,14 +60,8 @@ public class bzipParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-stuffit",fileExtensions);
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};
-    
    public bzipParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Bzip 2 UNIX Compressed File Parser";
    }
    
--- a/source/de/anomic/document/parser/docParser.java
+++ b/source/de/anomic/document/parser/docParser.java
@ -58,16 +58,8 @@ public class docParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-msword","doc");
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "tm-extractors-1.0.jar"
-    };    
-    
 	public docParser() {
-		super(LIBX_DEPENDENCIES);
+		super();
        this.parserName = "Word Document Parser";
 	}

--- a/source/de/anomic/document/parser/gzipParser.java
+++ b/source/de/anomic/document/parser/gzipParser.java
@ -62,14 +62,8 @@ public class gzipParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-tar",fileExtensions);
    }     

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};    
-    
    public gzipParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "GNU Zip Compressed Archive Parser";
    }
    
--- a/source/de/anomic/document/parser/html/ContentScraper.java
+++ b/source/de/anomic/document/parser/html/ContentScraper.java
@ -45,7 +45,7 @@ import java.util.Properties;
 import javax.swing.event.EventListenerList;

 import de.anomic.crawler.HTTPLoader;
-import de.anomic.document.ParserDispatcher;
+import de.anomic.document.parser.htmlParser;
 import de.anomic.http.httpClient;
 import de.anomic.http.httpHeader;
 import de.anomic.http.httpRequestHeader;
@ -501,7 +501,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        
        // scrape document to look up charset
        final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page),"UTF-8",new yacyURL("http://localhost", null),null,false);
-        final String charset = ParserDispatcher.patchCharsetEncoding(htmlFilter.detectCharset());
+        final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
        
        // scrape content
        final ContentScraper scraper = new ContentScraper(new yacyURL("http://localhost", null));
--- a/source/de/anomic/document/parser/htmlParser.java
+++ b/source/de/anomic/document/parser/htmlParser.java
@ -0,0 +1,195 @@
+package de.anomic.document.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Hashtable;
+
+import de.anomic.document.AbstractParser;
+import de.anomic.document.Document;
+import de.anomic.document.Parser;
+import de.anomic.document.ParserException;
+import de.anomic.document.parser.html.ContentScraper;
+import de.anomic.document.parser.html.ScraperInputStream;
+import de.anomic.document.parser.html.TransformerWriter;
+import de.anomic.kelondro.util.FileUtils;
+import de.anomic.yacy.yacyURL;
+
+public class htmlParser extends AbstractParser implements Parser {
+
+    /**
+     * a list of mime types that are supported by this parser class
+     * @see #getSupportedMimeTypes()
+     */
+    public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();  
+    static { 
+        SUPPORTED_MIME_TYPES.put("application/xhtml+xml","htm,html,xhtml,php,asp");
+        SUPPORTED_MIME_TYPES.put("text/html","htm,html,xhtml,php,asp");
+        SUPPORTED_MIME_TYPES.put("text/plain","htm,html,xhtml,php,asp,txt");
+        SUPPORTED_MIME_TYPES.put("text/sgml","htm,html,xhtml,php,asp,xml");
+    }
+    
+    public htmlParser() {
+        super();
+        this.parserName = "streaming html parser"; 
+    }
+    
+    @Override
+    public Document parse(
+            final yacyURL location, 
+            final String mimeType, 
+            final String documentCharset, 
+            final InputStream sourceStream) throws ParserException, InterruptedException {
+        
+        // make a scraper and transformer
+        final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
+        String charset = null;
+        try {
+            charset = htmlFilter.detectCharset();
+        } catch (IOException e1) {
+            throw new ParserException("Charset error:" + e1.getMessage(), location);
+        }
+        if (charset == null) {
+            charset = documentCharset;
+        } else {
+            charset = patchCharsetEncoding(charset);
+        }
+        
+        if (!documentCharset.equalsIgnoreCase(charset)) {
+            theLogger.logInfo("Charset transformation needed from '" + documentCharset + "' to '" + charset + "' for URL = " + location.toNormalform(true, true));
+        }
+        
+        Charset c;
+        try {
+            c = Charset.forName(charset);
+        } catch (IllegalCharsetNameException e) {
+            c = Charset.defaultCharset();
+        } catch (UnsupportedCharsetException e) {
+            c = Charset.defaultCharset();
+        }
+        
+        // parsing the content
+        final ContentScraper scraper = new ContentScraper(location);        
+        final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false);
+        try {
+            FileUtils.copy(htmlFilter, writer, c);
+            writer.close();
+        } catch (IOException e) {
+            throw new ParserException("IO error:" + e.getMessage(), location);
+        }
+        //OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);            
+        //serverFileUtils.copy(sourceFile, hfos);
+        //hfos.close();
+        if (writer.binarySuspect()) {
+            final String errorMsg = "Binary data found in resource";
+            theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg);
+            throw new ParserException(errorMsg,location);    
+        }
+        return transformScraper(location, mimeType, documentCharset, scraper);
+    }
+
+    private static Document transformScraper(final yacyURL location, final String mimeType, final String charSet, final ContentScraper scraper) {
+        final String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length];
+        int p = 0;
+        for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j];
+        final Document ppd =  new Document(
+                location,
+                mimeType,
+                charSet,
+                scraper.getContentLanguages(),
+                scraper.getKeywords(),
+                scraper.getTitle(),
+                scraper.getAuthor(),
+                sections,
+                scraper.getDescription(),
+                scraper.getText(),
+                scraper.getAnchors(),
+                scraper.getImages());
+        //scraper.close();            
+        ppd.setFavicon(scraper.getFavicon());
+        return ppd;
+    }
+
+
+    /**
+     * some html authors use wrong encoding names, either because they don't know exactly what they
+     * are doing or they produce a type. Many times, the upper/downcase scheme of the name is fuzzy
+     * This method patches wrong encoding names. The correct names are taken from
+     * http://www.iana.org/assignments/character-sets
+     * @param encoding
+     * @return patched encoding name
+     */
+    public static String patchCharsetEncoding(String encoding) {
+        
+        // return the system default encoding
+        if ((encoding == null) || (encoding.length() < 3)) return Charset.defaultCharset().name();
+        
+        // trim encoding string
+        encoding = encoding.trim();
+
+        // fix upper/lowercase
+        encoding = encoding.toUpperCase();
+        if (encoding.startsWith("SHIFT")) return "Shift_JIS";
+        if (encoding.startsWith("BIG")) return "Big5";
+        // all other names but such with "windows" use uppercase
+        if (encoding.startsWith("WINDOWS")) encoding = "windows" + encoding.substring(7);
+        if (encoding.startsWith("MACINTOSH")) encoding = "MacRoman";
+        
+        // fix wrong fill characters
+        encoding = encoding.replaceAll("_", "-");
+
+        if (encoding.matches("GB[_-]?2312([-_]80)?")) return "GB2312";
+        if (encoding.matches(".*UTF[-_]?8.*")) return "UTF-8";
+        if (encoding.startsWith("US")) return "US-ASCII";
+        if (encoding.startsWith("KOI")) return "KOI8-R";
+        
+        // patch missing '-'
+        if (encoding.startsWith("windows") && encoding.length() > 7) {
+            final char c = encoding.charAt(7);
+            if ((c >= '0') && (c <= '9')) {
+                encoding = "windows-" + encoding.substring(7);
+            }
+        }
+        
+        if (encoding.startsWith("ISO")) {
+            // patch typos
+            if (encoding.length() > 3) {
+                final char c = encoding.charAt(3);
+                if ((c >= '0') && (c <= '9')) {
+                    encoding = "ISO-" + encoding.substring(3);
+                }
+            }
+            if (encoding.length() > 8) {
+                final char c = encoding.charAt(8);
+                if ((c >= '0') && (c <= '9')) {
+                    encoding = encoding.substring(0, 8) + "-" + encoding.substring(8);           
+                } 
+            }
+        }
+        
+        // patch wrong name
+        if (encoding.startsWith("ISO-8559")) {
+            // popular typo
+            encoding = "ISO-8859" + encoding.substring(8);
+        }
+
+        // converting cp\d{4} -> windows-\d{4}
+        if (encoding.matches("CP([_-])?125[0-8]")) {
+            final char c = encoding.charAt(2);
+            if ((c >= '0') && (c <= '9')) {
+                encoding = "windows-" + encoding.substring(2);
+            } else {
+                encoding = "windows" + encoding.substring(2);
+            }
+        }
+
+        return encoding;
+    }
+    
+    public Hashtable<String, String> getSupportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+    
+}
--- a/source/de/anomic/document/parser/mimeTypeParser.java
+++ b/source/de/anomic/document/parser/mimeTypeParser.java
@ -64,17 +64,6 @@ public class mimeTypeParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-compressed","");
    } 
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "commons-logging-1.1.1.jar",
-        "jmimemagic-0.1.0.jar",
-        "jakarta-oro-2.0.7.jar",
-        "log4j-1.2.9.jar"
-    };
-    
    /**
     * Helping structure used to detect loops in the mimeType detection
     * process
@ -82,7 +71,7 @@ public class mimeTypeParser extends AbstractParser implements Parser {
    private static Hashtable<Thread, Integer> threadLoopDetection = new Hashtable<Thread, Integer>();
    
    public mimeTypeParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "MimeType Parser"; 
    }
    
--- a/source/de/anomic/document/parser/odtParser.java
+++ b/source/de/anomic/document/parser/odtParser.java
@ -69,14 +69,8 @@ public class odtParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt");
    }     

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {"odf_utils_05_11_29.jar"};        
-    
    public odtParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "OASIS OpenDocument V2 Text Document Parser"; 
    }
    
--- a/source/de/anomic/document/parser/pdfParser.java
+++ b/source/de/anomic/document/parser/pdfParser.java
@ -66,16 +66,8 @@ public class pdfParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("text/x-pdf","pdf");
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "PDFBox-0.7.3.jar", "FontBox-0.1.0-dev.jar", "bcprov-jdk14-139.jar", "bcmail-jdk14-139.jar"
-    };        
-    
    public pdfParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Acrobat Portable Document Parser"; 
    }
    
--- a/source/de/anomic/document/parser/pptParser.java
+++ b/source/de/anomic/document/parser/pptParser.java
@ -58,17 +58,8 @@ public class pptParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-m",fileExtensions);
   }

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "poi-3.2-FINAL-20081019.jar",
-        "poi-scratchpad-3.2-FINAL-20081019.jar"
-    }; 
-
    public pptParser(){
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Microsoft Powerpoint Parser";
    }

--- a/source/de/anomic/document/parser/psParser.java
+++ b/source/de/anomic/document/parser/psParser.java
@ -57,18 +57,12 @@ public class psParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-postscript-not-eps","ps");
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};          
-    
    private final static Object modeScan = new Object();
    private static boolean modeScanDone = false;
    private static String parserMode = "java";
    
    public psParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "PostScript Document Parser"; 
        if (!modeScanDone) synchronized (modeScan) {
        	if (testForPs2Ascii()) parserMode = "ps2ascii";
--- a/source/de/anomic/document/parser/rpmParser.java
+++ b/source/de/anomic/document/parser/rpmParser.java
@ -64,14 +64,8 @@ public class rpmParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-redhat-package-manager","rpm");         
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {"jrpm-head.jar"};        
-    
    public rpmParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "rpm Parser"; 
    }
    
--- a/source/de/anomic/document/parser/rssParser.java
+++ b/source/de/anomic/document/parser/rssParser.java
@ -68,14 +68,8 @@ public class rssParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/atom+xml",fileExtensions);
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};       
-    
 	public rssParser() {
-		super(LIBX_DEPENDENCIES);
+		super();
        this.parserName = "Rich Site Summary/Atom Feed Parser"; 
 	}

--- a/source/de/anomic/document/parser/rtfParser.java
+++ b/source/de/anomic/document/parser/rtfParser.java
@ -56,14 +56,8 @@ public class rtfParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-soffice","rtf");
    } 

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};    
-    
 	public rtfParser() {
-		super(LIBX_DEPENDENCIES);
+		super();
        this.parserName = "Rich Text Format Parser";  
 	}

--- a/source/de/anomic/document/parser/sevenzipParser.java
+++ b/source/de/anomic/document/parser/sevenzipParser.java
@ -61,14 +61,8 @@ public class sevenzipParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-7z-compressed", "7z"); 
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] { "J7Zip-modified.jar" };
-    
    public sevenzipParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
        super.parserName = "7zip Archive Parser";
    }
    
--- a/source/de/anomic/document/parser/swfParser.java
+++ b/source/de/anomic/document/parser/swfParser.java
@ -52,14 +52,8 @@ public class swfParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("image/vnd.rn-realflash","swf");
    }

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {"webcat-0.1-swf.jar"};
-
    public swfParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Adobe Flash Parser";
    }

--- a/source/de/anomic/document/parser/tarParser.java
+++ b/source/de/anomic/document/parser/tarParser.java
@ -69,16 +69,8 @@ public class tarParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/x-compressed","tar");
    }     

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-//        "tar.jar"
-    };    
-    
    public tarParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Tape Archive File Parser"; 
    }
    
--- a/source/de/anomic/document/parser/vcfParser.java
+++ b/source/de/anomic/document/parser/vcfParser.java
@ -72,14 +72,8 @@ public class vcfParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("text/x-vcalendar","vcf");
    }
    
-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};        
-    
    public vcfParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "vCard Parser"; 
    }
    
--- a/source/de/anomic/document/parser/vsdParser.java
+++ b/source/de/anomic/document/parser/vsdParser.java
@ -56,17 +56,8 @@ public class vsdParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("zz-application/zz-winassoc-vsd","vsd");
    }

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "poi-3.2-FINAL-20081019.jar",
-        "poi-scratchpad-3.2-FINAL-20081019.jar",
-    }; 
-
    public vsdParser() {
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Microsoft Visio Parser";
    }

--- a/source/de/anomic/document/parser/xlsParser.java
+++ b/source/de/anomic/document/parser/xlsParser.java
@ -69,17 +69,8 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
        SUPPORTED_MIME_TYPES.put("application/xls","xls");
    }     

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {
-        "poi-3.2-FINAL-20081019.jar",
-        "poi-scratchpad-3.2-FINAL-20081019.jar"
-    }; 
-
    public xlsParser(){
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Microsoft Excel Parser";
    }

--- a/source/de/anomic/document/parser/zipParser.java
+++ b/source/de/anomic/document/parser/zipParser.java
@ -69,14 +69,8 @@ public class zipParser extends AbstractParser implements Parser {
        SUPPORTED_MIME_TYPES.put("application/java-archive","jar");
    }     

-    /**
-     * a list of library names that are needed by this parser
-     * @see Parser#getLibxDependences()
-     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};        
-    
    public zipParser() {        
-        super(LIBX_DEPENDENCIES);
+        super();
        this.parserName = "Compressed Archive File Parser"; 
    }
    
--- a/source/de/anomic/http/httpdFileHandler.java
+++ b/source/de/anomic/http/httpdFileHandler.java
@ -81,6 +81,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.zip.GZIPOutputStream;

 import de.anomic.document.ParserDispatcher;
+import de.anomic.document.parser.htmlParser;
 import de.anomic.document.parser.html.ContentScraper;
 import de.anomic.document.parser.html.ScraperInputStream;
 import de.anomic.kelondro.util.ByteBuffer;
@ -860,7 +861,7 @@ public final class httpdFileHandler {
                    			fis.mark(1000);
                    			// scrape document to look up charset
                    			final ScraperInputStream htmlFilter = new ScraperInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
-                    			final String charset = ParserDispatcher.patchCharsetEncoding(htmlFilter.detectCharset());
+                    			final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
                    			if(charset != null)
                    				mimeType = mimeType + "; charset="+charset;
                    			// reset position
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -523,7 +523,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
        // define a realtime parsable mimetype list
        log.logConfig("Parser: Initializing Mime Types");
        ParserDispatcher.initHTMLParsableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES_HTML, "application/xhtml+xml,text/html,text/plain"));
-        ParserDispatcher.addParseableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES_CRAWLER, null));
+        ParserDispatcher.addParseableMimeTypes(getConfig(plasmaSwitchboardConstants.PARSER_MIMETYPES, null));
        
        // start a loader
        log.logConfig("Starting Crawl Loader");
--- a/source/de/anomic/plasma/plasmaSwitchboardConstants.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardConstants.java
@ -244,11 +244,8 @@ public final class plasmaSwitchboardConstants {
    public static final String RANKING_DIST_1_METHOD            = "CRDist1Method";
    public static final String RANKING_DIST_1_PERCENT           = "CRDist1Percent";
    public static final String RANKING_DIST_1_TARGET            = "CRDist1Target";
+    public static final String PARSER_MIMETYPES                 = "parseableMimeTypes";
    public static final String PARSER_MIMETYPES_HTML            = "parseableMimeTypes.HTML";
-    public static final String PARSER_MIMETYPES_PROXY           = "parseableMimeTypes.PROXY";
-    public static final String PARSER_MIMETYPES_CRAWLER         = "parseableMimeTypes.CRAWLER";
-    public static final String PARSER_MIMETYPES_ICAP            = "parseableMimeTypes.ICAP";
-    public static final String PARSER_MIMETYPES_URLREDIRECTOR   = "parseableMimeTypes.URLREDIRECTOR";
    public static final String PARSER_MIMETYPES_IMAGE           = "parseableMimeTypes.IMAGE";
    public static final String PARSER_MEDIA_EXT                 = "mediaExt";
    public static final String PARSER_MEDIA_EXT_PARSEABLE       = "parseableExt";
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@ -56,8 +56,6 @@ import de.anomic.kelondro.util.SortStack;
 import de.anomic.kelondro.util.FileUtils;
 import de.anomic.plasma.plasmaProfiling;
 import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.plasmaProfiling.searchEvent;
-import de.anomic.search.QueryEvent.ResultEntry;
 import de.anomic.server.serverProfiling;
 import de.anomic.yacy.yacyURL;

--- a/source/migration.java
+++ b/source/migration.java
@ -233,12 +233,6 @@ public class migration {
        
        // migration for additional parser settings
        String value = "";
-        if (((value = sb.getConfig("parseableMimeTypes","")).length() > 0) && (sb.getConfig("parseableMimeTypes.CRAWLER", "").length() == 0)) {
-            sb.setConfig("parseableMimeTypes.CRAWLER", value);
-            sb.setConfig("parseableMimeTypes.PROXY", value);
-            sb.setConfig("parseableMimeTypes.URLREDIRECTOR", value);
-            sb.setConfig("parseableMimeTypes.ICAP", value);
-        }
        //Locales in DATA, because DATA must be writable, htroot not.
        if(sb.getConfig("locale.translated_html", "DATA/LOCALE/htroot").equals("htroot/locale")){
        	sb.setConfig("locale.translated_html", "DATA/LOCALE/htroot");