From af10f729df9920961e40d4304280c8f900e1192a Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Thu, 22 Nov 2007 01:34:29 +0000
Subject: [PATCH] fixed image search and favicon loading

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4225 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 source/de/anomic/http/httpdProxyHandler.java  |   6 +-
 .../plasma/crawler/plasmaCrawlQueues.java     |   5 +-
 .../plasma/crawler/plasmaHTTPLoader.java      |  10 +-
 .../plasma/crawler/plasmaProtocolLoader.java  |   8 +-
 source/de/anomic/plasma/plasmaCrawlEntry.java |   6 +-
 source/de/anomic/plasma/plasmaParser.java     | 162 ++++++------------
 .../de/anomic/plasma/plasmaParserConfig.java  |  12 +-
 .../de/anomic/plasma/plasmaSwitchboard.java   |  16 +-
 yacy.init                                     |   5 +-
 9 files changed, 89 insertions(+), 141 deletions(-)

diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index 1fe76fb47..c51f8bfcf 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -581,7 +581,7 @@ public final class httpdProxyHandler {
             // handle file types and make (possibly transforming) output stream
             if (
                     (!transformer.isIdentityTransformer()) &&
-                    (plasmaParser.supportedRealTimeContent(url,res.responseHeader.mime()))
+                    (plasmaParser.supportedHTMLContent(url,res.responseHeader.mime()))
                 ) {
                 // make a transformer
                 theLogger.logFine("create transformer for URL " + url);
@@ -794,8 +794,8 @@ public final class httpdProxyHandler {
                 
                 // make a transformer
                 if (( !transformer.isIdentityTransformer()) &&
-                        (ext == null || !plasmaParser.supportedRealtimeFileExtContains(url)) &&
-                        (plasmaParser.realtimeParsableMimeTypesContains(cachedResponseHeader.mime()))) {
+                        (ext == null || !plasmaParser.supportedHTMLFileExtContains(url)) &&
+                        (plasmaParser.HTMLParsableMimeTypesContains(cachedResponseHeader.mime()))) {
                     hfos = new htmlFilterWriter((chunkedOut != null) ? chunkedOut : respond, charSet, null, transformer, (ext.length() == 0));
                 } else {
                     hfos = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);
diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java
index f72d3bce8..acc3128e0 100644
--- a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java
+++ b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java
@@ -39,6 +39,7 @@ import de.anomic.plasma.plasmaCrawlNURL;
 import de.anomic.plasma.plasmaCrawlProfile;
 import de.anomic.plasma.plasmaCrawlZURL;
 import de.anomic.plasma.plasmaHTCache;
+import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.logging.serverLog;
 import de.anomic.tools.crypt;
@@ -518,7 +519,7 @@ public class plasmaCrawlQueues {
                 0, 
                 0);
         
-        return loader.load(centry);
+        return loader.load(centry, (forText) ? plasmaParser.PARSER_MODE_CRAWLER : plasmaParser.PARSER_MODE_IMAGE);
     }
     
     public int size() {
@@ -547,7 +548,7 @@ public class plasmaCrawlQueues {
                 } else {
                     // starting a load from the internet
                     this.entry.setStatus("worker-loading");
-                    String result = loader.process(this.entry);
+                    String result = loader.process(this.entry, plasmaParser.PARSER_MODE_CRAWLER);
                     if (result != null) {
                         plasmaCrawlZURL.Entry eentry = errorURL.newEntry(this.entry.url(), "cannot load: " + result);
                         eentry.store();
diff --git a/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java b/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java
index 4c0309bf4..078051f96 100644
--- a/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java
+++ b/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java
@@ -130,11 +130,11 @@ public final class plasmaHTTPLoader {
         );
     }    
    
-    public plasmaHTCache.Entry load(plasmaCrawlEntry entry) {
-        return load(entry, DEFAULT_CRAWLING_RETRY_COUNT);
+    public plasmaHTCache.Entry load(plasmaCrawlEntry entry, String parserMode) {
+        return load(entry, parserMode, DEFAULT_CRAWLING_RETRY_COUNT);
     }
     
-    private plasmaHTCache.Entry load(plasmaCrawlEntry entry, int retryCount) {
+    private plasmaHTCache.Entry load(plasmaCrawlEntry entry, String parserMode, int retryCount) {
 
         if (retryCount < 0) {
             this.log.logInfo("Redirection counter exceeded for URL " + entry.url().toString() + ". Processing aborted.");
@@ -212,7 +212,7 @@ public final class plasmaHTTPLoader {
                 // request has been placed and result has been returned. work off response
                 File cacheFile = plasmaHTCache.getCachePath(entry.url());
                 try {
-                    if (plasmaParser.supportedContent(plasmaParser.PARSER_MODE_CRAWLER,entry.url(),res.responseHeader.mime())) {
+                    if (plasmaParser.supportedContent(parserMode, entry.url(), res.responseHeader.mime())) {
                         // delete old content
                         if (cacheFile.isFile()) {
                             plasmaHTCache.deleteURLfromCache(entry.url());
@@ -310,7 +310,7 @@ public final class plasmaHTTPLoader {
                         
                         // retry crawling with new url
                         entry.redirectURL(redirectionUrl);
-                        return load(entry, retryCount - 1);
+                        return load(entry, plasmaParser.PARSER_MODE_URLREDIRECTOR, retryCount - 1);
                         
                     }
             } else {
diff --git a/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java b/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java
index 1cbb3645c..a184213f2 100644
--- a/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java
+++ b/source/de/anomic/plasma/crawler/plasmaProtocolLoader.java
@@ -61,23 +61,23 @@ public final class plasmaProtocolLoader {
         return (HashSet) this.supportedProtocols.clone();
     }
     
-    public plasmaHTCache.Entry load(plasmaCrawlEntry entry) {
+    public plasmaHTCache.Entry load(plasmaCrawlEntry entry, String parserMode) {
         // getting the protocol of the next URL                
         String protocol = entry.url().getProtocol();
         
-        if ((protocol.equals("http") || (protocol.equals("https")))) return httpLoader.load(entry);
+        if ((protocol.equals("http") || (protocol.equals("https")))) return httpLoader.load(entry, parserMode);
         if (protocol.equals("ftp")) return ftpLoader.load(entry);
         
         this.log.logWarning("Unsupported protocol '" + protocol + "' in url " + entry.url());
         return null;
     }
     
-    public String process(plasmaCrawlEntry entry) {
+    public String process(plasmaCrawlEntry entry, String parserMode) {
         // load a resource, store it to htcache and push queue entry to switchboard queue
         // returns null if everything went fine, a fail reason string if a problem occurred
         plasmaHTCache.Entry h;
         try {
-            h = load(entry);
+            h = load(entry, parserMode);
             entry.setStatus("loaded");
             if (h == null) return "load failed";
             boolean stored = sb.htEntryStoreProcess(h);
diff --git a/source/de/anomic/plasma/plasmaCrawlEntry.java b/source/de/anomic/plasma/plasmaCrawlEntry.java
index 712281286..efc53bf79 100644
--- a/source/de/anomic/plasma/plasmaCrawlEntry.java
+++ b/source/de/anomic/plasma/plasmaCrawlEntry.java
@@ -1,6 +1,6 @@
-// plasmaCrawlBalancerEntry.java
-// (C) 2007 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
-// first published 14.03.2007 on http://www.anomic.de
+// plasmaCrawlEntry.java
+// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 14.03.2007 on http://yacy.net
 //
 // This is a part of YaCy, a peer-to-peer based web search engine
 //
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index 6efd711dc..8aa48d51d 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -1,12 +1,16 @@
 // plasmaParser.java 
-// ------------------------
-// part of YaCy
-// (C) by Michael Peter Christen; mc@anomic.de
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2005
+// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published in january 2005 on http://yacy.net
+// with contributions 02.05.2005 by Martin Thelian
 //
-// last major change: 02.05.2005 by Martin Thelian
+// This is a part of YaCy, a peer-to-peer based web search engine
 //
+// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
+// $LastChangedRevision: 1986 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+// 
 // This program is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
 // the Free Software Foundation; either version 2 of the License, or
@@ -20,27 +24,6 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//
-// Using this software in any meaning (reading, learning, copying, compiling,
-// running) means that you agree that the Author(s) is (are) not responsible
-// for cost, loss of data or any harm that may be caused directly or indirectly
-// by usage of this softare or this documentation. The usage of this software
-// is on your own risk. The installation and usage (starting/running) of this
-// software may allow other people or application to access your computer and
-// any attached devices and is highly dependent on the configuration of the
-// software which must be done by the user of the software; the author(s) is
-// (are) also not responsible for proper configuration and usage of the
-// software, even if provoked by documentation provided together with
-// the software.
-//
-// Any changes to this file according to the GPL as documented in the file
-// gpl.txt aside this file in the shipment you received can be done to the
-// lines that follows this copyright notice here, but changes must not be
-// done inside the copyright notive above. A re-distribution must contain
-// the intact and unchanged copyright notice.
-// Contributions and changes to the program code must be marked as such.
-
-// compile: javac -classpath lib/commons-collections.jar:lib/commons-pool.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
 
 package de.anomic.plasma;
 
@@ -78,15 +61,17 @@ import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacyURL;
 
 public final class plasmaParser {
-    public static final String PARSER_MODE_PROXY   = "PROXY";
-    public static final String PARSER_MODE_CRAWLER = "CRAWLER";
+    public static final String PARSER_MODE_PROXY         = "PROXY";
+    public static final String PARSER_MODE_CRAWLER       = "CRAWLER";
     public static final String PARSER_MODE_URLREDIRECTOR = "URLREDIRECTOR";
-    public static final String PARSER_MODE_ICAP = "ICAP";
+    public static final String PARSER_MODE_ICAP          = "ICAP";
+    public static final String PARSER_MODE_IMAGE         = "IMAGE";
     public static final HashSet PARSER_MODE = new HashSet(Arrays.asList(new String[]{
             PARSER_MODE_PROXY,
             PARSER_MODE_CRAWLER,
             PARSER_MODE_ICAP,
-            PARSER_MODE_URLREDIRECTOR
+            PARSER_MODE_URLREDIRECTOR,
+            PARSER_MODE_IMAGE
     }));
     
     private static final HashMap parserConfigList = new HashMap();
@@ -98,15 +83,10 @@ public final class plasmaParser {
     public static final Properties availableParserList = new Properties();
     
     /**
-     * A list of file extensions that are supported by the html-parser and can
-     * be parsed in realtime.
+     * A list of file extensions and mime types that are supported by the html-parser
      */
-    public static final HashSet supportedRealtimeFileExt = new HashSet();
-    
-    /**
-     * A list of mimeTypes that can be parsed in Realtime (on the fly)
-     */
-    public static final HashSet realtimeParsableMimeTypes = new HashSet();    
+    public static final HashSet supportedHTMLFileExt = new HashSet();
+    public static final HashSet supportedHTMLMimeTypes = new HashSet();    
     
     private static final Properties mimeTypeLookupByFileExt = new Properties();
     static {
@@ -194,34 +174,24 @@ public final class plasmaParser {
     }
     
     /**
-     * This function is used to initialize the realtimeParsableMimeTypes List.
+     * This function is used to initialize the HTMLParsableMimeTypes List.
      * This list contains a list of mimeTypes that can be parsed in realtime by
      * the yacy html-Parser
-     * @param realtimeParsableMimeTypes a list of mimetypes that can be parsed by the 
+     * @param htmlParsableMimeTypes a list of mimetypes that can be parsed by the 
      * yacy html parser
      */
-    public static void initRealtimeParsableMimeTypes(String realtimeParsableMimeTypes) {
+    public static void initHTMLParsableMimeTypes(String htmlParsableMimeTypes) {
         LinkedList mimeTypes = new LinkedList();
-        if ((realtimeParsableMimeTypes == null) || (realtimeParsableMimeTypes.length() == 0)) {
-            // Nothing todo here
-        } else {            
-            String[] realtimeParsableMimeTypeList = realtimeParsableMimeTypes.split(",");        
-            for (int i = 0; i < realtimeParsableMimeTypeList.length; i++) mimeTypes.add(realtimeParsableMimeTypeList[i].toLowerCase().trim());
+        if ((htmlParsableMimeTypes == null) || (htmlParsableMimeTypes.length() == 0)) {
+            return;
         }
-        initRealtimeParsableMimeTypes(mimeTypes);
-    }
-    
-    /**
-     * This function is used to initialize the realtimeParsableMimeTypes List.
-     * This list contains a list of mimeTypes that can be parsed in realtime by
-     * the yacy html-Parser
-     * @param realtimeParsableMimeTypes a list of mimetypes that can be parsed by the 
-     * yacy html parser
-     */    
-    public static void initRealtimeParsableMimeTypes(List mimeTypesList) {
-        synchronized (realtimeParsableMimeTypes) {
-            realtimeParsableMimeTypes.clear();
-            realtimeParsableMimeTypes.addAll(mimeTypesList);
+        String[] realtimeParsableMimeTypeList = htmlParsableMimeTypes.split(",");        
+        for (int i = 0; i < realtimeParsableMimeTypeList.length; i++) {
+            mimeTypes.add(realtimeParsableMimeTypeList[i].toLowerCase().trim());
+        }
+        synchronized (supportedHTMLMimeTypes) {
+            supportedHTMLMimeTypes.clear();
+            supportedHTMLMimeTypes.addAll(mimeTypes);
         }        
     }
     
@@ -277,32 +247,31 @@ public final class plasmaParser {
         }        
     }
     
-    public static void initSupportedRealtimeFileExt(List supportedRealtimeFileExtList) {
-        synchronized (supportedRealtimeFileExt) {
-            supportedRealtimeFileExt.clear();
-            supportedRealtimeFileExt.addAll(supportedRealtimeFileExtList);
+    public static void initSupportedHTMLFileExt(List supportedRealtimeFileExtList) {
+        synchronized (supportedHTMLFileExt) {
+            supportedHTMLFileExt.clear();
+            supportedHTMLFileExt.addAll(supportedRealtimeFileExtList);
         }
     }
         
-    public static boolean realtimeParsableMimeTypesContains(String mimeType) {
-        mimeType = getRealMimeType(mimeType);
-        synchronized (realtimeParsableMimeTypes) {
-            return realtimeParsableMimeTypes.contains(mimeType);
+    public static boolean HTMLParsableMimeTypesContains(String mimeType) {
+        mimeType = normalizeMimeType(mimeType);
+        synchronized (supportedHTMLMimeTypes) {
+            return supportedHTMLMimeTypes.contains(mimeType);
         }
     }
     
-    public static boolean supportedRealTimeContent(yacyURL url, String mimeType) {
-        return realtimeParsableMimeTypesContains(mimeType) && supportedRealtimeFileExtContains(url);
+    public static boolean supportedHTMLContent(yacyURL url, String mimeType) {
+        return HTMLParsableMimeTypesContains(mimeType) && supportedHTMLFileExtContains(url);
     }    
     
-    public static boolean supportedRealtimeFileExtContains(yacyURL url) {
+    public static boolean supportedHTMLFileExtContains(yacyURL url) {
         String fileExt = getFileExt(url);
-        synchronized (supportedRealtimeFileExt) {
-            return supportedRealtimeFileExt.contains(fileExt);
+        synchronized (supportedHTMLFileExt) {
+            return supportedHTMLFileExt.contains(fileExt);
         }   
     }
 
-    
     public static String getFileExt(yacyURL url) {
         // getting the file path
         String name = url.getPath();
@@ -319,13 +288,12 @@ public final class plasmaParser {
         return name.substring(p + 1);        
     }
 
-    
     public static boolean mediaExtContains(String mediaExt) {
         if (mediaExt == null) return false;
         mediaExt = mediaExt.trim().toLowerCase();
         
-        synchronized (supportedRealtimeFileExt) {
-            if (supportedRealtimeFileExt.contains(mediaExt)) return false;
+        synchronized (supportedHTMLFileExt) {
+            if (supportedHTMLFileExt.contains(mediaExt)) return false;
         }        
         
         if (supportedFileExtContains(mediaExt)) return false;
@@ -407,7 +375,7 @@ public final class plasmaParser {
     	return encoding;
     }
     
-    public static String getRealMimeType(String mimeType) {
+    public static String normalizeMimeType(String mimeType) {
         //if (mimeType == null) doMimeTypeAnalysis
         if (mimeType == null) mimeType = "application/octet-stream";
         mimeType = mimeType.trim().toLowerCase();
@@ -616,7 +584,7 @@ public final class plasmaParser {
                 this.theLogger.logFine("Parsing '" + location + "' from stream");            
             
             // getting the mimetype of the document
-            mimeType = getRealMimeType(theMimeType);
+            mimeType = normalizeMimeType(theMimeType);
             
             // getting the file extension of the document
             String fileExt = getFileExt(location);
@@ -646,7 +614,7 @@ public final class plasmaParser {
                 theParser.setContentLength(contentLength);
                 // parse the resource
                 doc = theParser.parse(location, mimeType,documentCharset,sourceStream);
-            } else if (realtimeParsableMimeTypesContains(mimeType)) {
+            } else if (HTMLParsableMimeTypesContains(mimeType)) {
                 doc = parseHtml(location, mimeType, documentCharset, sourceStream);
             } else {
                 String errorMsg = "No parser available to parse mimetype '" + mimeType + "'";
@@ -749,7 +717,7 @@ public final class plasmaParser {
      */
     private Parser getParser(String mimeType) {
 
-        mimeType = getRealMimeType(mimeType);        
+        mimeType = normalizeMimeType(mimeType);        
         try {
             
             // determining the proper parser class name for the mimeType
@@ -782,17 +750,6 @@ public final class plasmaParser {
         
     }
     
-    /*
-    public static String urlNormalform(URL url) {
-        if (url == null) return null;
-        return urlNormalform(url.toString());
-    }
-    
-    public static String urlNormalform(String us) {
-        return htmlFilterContentScraper.urlNormalform(us);
-    }   
-    */
-    
     static Map allReflinks(Set links) {
         // links is either a Set of Strings (with urls) or htmlFilterImageEntries
         // we find all links that are part of a reference inside a url
@@ -909,11 +866,8 @@ public final class plasmaParser {
             // creating a plasma parser
             plasmaParser theParser = new plasmaParser();
             
-            // configuring the realtime parsable mimeTypes
-            plasmaParser.initRealtimeParsableMimeTypes("application/xhtml+xml,text/html,text/plain,text/sgml");
-            
-            // configure all other supported mimeTypes
-            plasmaParser.enableAllParsers(PARSER_MODE_PROXY);
+            // configuring the html parsable mimeTypes
+            plasmaParser.initHTMLParsableMimeTypes("application/xhtml+xml,text/html,text/plain,text/sgml");
 
             // parsing the content
             plasmaParserDocument document = null;
@@ -955,17 +909,6 @@ public final class plasmaParser {
         }
     }
     
-    private static void enableAllParsers(String parserMode) {
-        if (!PARSER_MODE.contains(parserMode)) throw new IllegalArgumentException();
-        
-        plasmaParserConfig config = (plasmaParserConfig) parserConfigList.get(parserMode);
-        if (config == null) {
-            config = new plasmaParserConfig(parserMode);
-            parserConfigList.put(parserMode, config);
-        }
-        config.enableAllParsers();        
-    }
-    
     public static boolean supportedContent(yacyURL url, String mimeType) {
         if (url == null) throw new NullPointerException();
         
@@ -984,6 +927,7 @@ public final class plasmaParser {
         if (!PARSER_MODE.contains(parserMode)) throw new IllegalArgumentException();
         if (url == null) throw new NullPointerException();
         
+        if (parserMode.equals(PARSER_MODE_IMAGE)) return true;
         plasmaParserConfig config = (plasmaParserConfig) parserConfigList.get(parserMode);
         return (config == null)?false:config.supportedContent(url, mimeType);
     }
diff --git a/source/de/anomic/plasma/plasmaParserConfig.java b/source/de/anomic/plasma/plasmaParserConfig.java
index da96bb52b..0b3ffad27 100644
--- a/source/de/anomic/plasma/plasmaParserConfig.java
+++ b/source/de/anomic/plasma/plasmaParserConfig.java
@@ -88,7 +88,7 @@ public class plasmaParserConfig {
     public boolean supportedContent(yacyURL url, String mimeType) {
         // TODO: we need some exceptions here to index URLs like this
         //       http://www.musicabona.com/respighi/12668/cd/index.html.fr
-        mimeType = plasmaParser.getRealMimeType(mimeType);
+        mimeType = plasmaParser.normalizeMimeType(mimeType);
         if (
                 mimeType.equals("text/html") ||
                 mimeType.equals("application/xhtml+xml") ||
@@ -100,10 +100,10 @@ public class plasmaParserConfig {
     }        
     
     public boolean supportedMimeTypesContains(String mimeType) {
-        mimeType = plasmaParser.getRealMimeType(mimeType);
+        mimeType = plasmaParser.normalizeMimeType(mimeType);
         
-        synchronized (plasmaParser.realtimeParsableMimeTypes) {
-            if (plasmaParser.realtimeParsableMimeTypes.contains(mimeType)) return true;
+        synchronized (plasmaParser.supportedHTMLMimeTypes) {
+            if (plasmaParser.supportedHTMLMimeTypes.contains(mimeType)) return true;
         }        
 
         synchronized (this.enabledParserList) { 
@@ -124,8 +124,8 @@ public class plasmaParserConfig {
         if (fileExt == null) return false;        
         fileExt = fileExt.trim().toLowerCase();
 
-        synchronized (plasmaParser.supportedRealtimeFileExt) {
-            if (plasmaParser.supportedRealtimeFileExt.contains(fileExt)) return true;
+        synchronized (plasmaParser.supportedHTMLFileExt) {
+            if (plasmaParser.supportedHTMLFileExt.contains(fileExt)) return true;
         }        
 
         synchronized(this.supportedFileExt) {
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 688737558..0f9aef309 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -563,11 +563,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
     // Parser settings
     //////////////////////////////////////////////////////////////////////////////////////////////
     
-    public static final String PARSER_MIMETYPES_REALTIME        = "parseableRealtimeMimeTypes";
+    public static final String PARSER_MIMETYPES_HTML            = "parseableMimeTypes.HTML";
     public static final String PARSER_MIMETYPES_PROXY           = "parseableMimeTypes.PROXY";
     public static final String PARSER_MIMETYPES_CRAWLER         = "parseableMimeTypes.CRAWLER";
     public static final String PARSER_MIMETYPES_ICAP            = "parseableMimeTypes.ICAP";
     public static final String PARSER_MIMETYPES_URLREDIRECTOR   = "parseableMimeTypes.URLREDIRECTOR";
+    public static final String PARSER_MIMETYPES_IMAGE           = "parseableMimeTypes.IMAGE";
     public static final String PARSER_MEDIA_EXT                 = "mediaExt";
     public static final String PARSER_MEDIA_EXT_PARSEABLE       = "parseableExt";
     
@@ -1180,15 +1181,16 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         // define an extension-blacklist
         log.logConfig("Parser: Initializing Extension Mappings for Media/Parser");
         plasmaParser.initMediaExt(plasmaParser.extString2extList(getConfig(PARSER_MEDIA_EXT,"")));
-        plasmaParser.initSupportedRealtimeFileExt(plasmaParser.extString2extList(getConfig(PARSER_MEDIA_EXT_PARSEABLE,"")));
+        plasmaParser.initSupportedHTMLFileExt(plasmaParser.extString2extList(getConfig(PARSER_MEDIA_EXT_PARSEABLE,"")));
         
         // define a realtime parsable mimetype list
         log.logConfig("Parser: Initializing Mime Types");
-        plasmaParser.initRealtimeParsableMimeTypes(getConfig(PARSER_MIMETYPES_REALTIME,"application/xhtml+xml,text/html,text/plain"));
-        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_PROXY,getConfig(PARSER_MIMETYPES_PROXY,null));
-        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER,getConfig(PARSER_MIMETYPES_CRAWLER,null));
-        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_ICAP,getConfig(PARSER_MIMETYPES_ICAP,null));
-        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_URLREDIRECTOR,getConfig(PARSER_MIMETYPES_URLREDIRECTOR,null));
+        plasmaParser.initHTMLParsableMimeTypes(getConfig(PARSER_MIMETYPES_HTML, "application/xhtml+xml,text/html,text/plain"));
+        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_PROXY, getConfig(PARSER_MIMETYPES_PROXY, null));
+        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, getConfig(PARSER_MIMETYPES_CRAWLER, null));
+        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_ICAP, getConfig(PARSER_MIMETYPES_ICAP, null));
+        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_URLREDIRECTOR, getConfig(PARSER_MIMETYPES_URLREDIRECTOR, null));
+        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_IMAGE, getConfig(PARSER_MIMETYPES_IMAGE, null));
         
         // start a loader
         log.logConfig("Starting Crawl Loader");
diff --git a/yacy.init b/yacy.init
index 5cb9c397b..55264ca16 100644
--- a/yacy.init
+++ b/yacy.init
@@ -224,9 +224,9 @@ proxyCacheMigration = true
 
 # the following mime-types are the whitelist for indexing
 #
-# parseableRealtimeMimeTypes: specifies mime-types that can be indexed on the fly
+# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
 # parseableMime: specifies mime-types that can be indexed but not on the fly
-parseableRealtimeMimeTypes=application/xhtml+xml,text/html,text/plain,text/sgml
+parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml
 parseableMimeTypes=
 parseableMimeTypes__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml
 parseableMimeTypes.CRAWLER=
@@ -237,6 +237,7 @@ parseableMimeTypes.ICAP=
 parseableMimeTypes.ICAP__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml
 parseableMimeTypes.URLREDIRECTOR=
 parseableMimeTypes.URLREDIRECTOR__pro=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml
+parseableMimeTypes.IMAGE=image/gif,image/jpeg,image/png,image/tiff,image/vnd.wap.wbmp,image/x-icon,image/bmp
 
 # media extension string
 # a comma-separated list of extensions that denote media file formats