*) Redesign of parser configuration

- restructuring of mimeTypes based on the parsers - displaying parser usage count - displaying human readably parser names - displaying parser version information *) httpdFileHandler.java - adding possibility to support "streaming" servlets which are special servlets that can communicate with the client via the connection streams autonomous - the name of these new servlet types must end with the file extension .stream - this feature will be needed by the yacy ScreenSaver class to fetch statistic data from the peer without the need to reconnect to the server all the time *) Adding human readable names and version information for all supported parsers *) plasmaParser.java - adding new structure to store parser statistic data *) Adding openDocument parser - can be used to parse odt files *) jmimemagic - adding rules to detect openDocument formats properly *) serverLog.java - adding functions that can be used to query if a given logging level is enabled or not. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1140 6c8d7289-2bf4-0310-a012-ef5d649a1542
20 years ago · bdf30117c1
parent 3037c1e73b
commit bdf30117c1
24 changed files with 735 additions and 166 deletions
--- a/htroot/Settings_Parser.inc
+++ b/htroot/Settings_Parser.inc
@ -7,23 +7,30 @@ For a detailed description of the various MIME-types take a look at <a href="htt
 <tr class="TableHeader" valign="bottom">
 <td class="small" >Activate</td>
 <td class="small" >Mime-Type</td>
-<td class="small" >Parser&nbsp;Class&nbsp;Name</td>
+<td class="small" >Parser&nbsp;Usage</td>
 <td class="small" ></td>
 </tr>
 #{parser}#
 <tr class="TableCellDark">
-	<td class="small" align="center"><input type="checkbox" name="#[mime]#" align="top" #(status)#::checked#(/status)#></td>
-	<td class="small" >#[mime]#</td>
-	<td class="small" title="Full qualified name: #[name]#">#[shortname]#</td>
+	<td colspan="2">#[name]# V#[version]#</td>
+	<td>#[usage]#</td>
+	<td>&nbsp;</td>
+</tr>
+#{mime}#
+<tr class="TableCellLight">
+	<td class="small" align="center"><input type="checkbox" name="#[mimetype]#" align="top" #(status)#::checked#(/status)#></td>
+	<td class="small">#[mimetype]#</td>
+	<td class="small">&nbsp;</td>
 	<td class="small" width="100%"></td>
-	</tr>
+</tr>
+#{/mime}#
 #{/parser}#
-  <tr class="TableCellLight">
+  <tr class="TableCellDark">
    <td class="small" align="center"><input type="checkbox" name="allParserEnabled" align="top" #(allParserEnabled)#::checked#(/allParserEnabled)#>
    <td colspan="2" class="small" >Enable all parsers</td>
    <td class="small">&nbsp;</td>
  </tr>
-  <tr class="TableCellLight">
+  <tr class="TableCellDark">
    <td colspan="4" class="small" ><input type="submit" name="parserSettings" value="submit">&nbsp;Changes take effect immediately</td>
  </tr>
 </table>
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@ -45,13 +45,16 @@

 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.List;

 import de.anomic.http.httpHeader;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.parser.ParserInfo;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
 import de.anomic.yacy.yacyCore;
@ -249,29 +252,42 @@ public final class Settings_p {
         * Parser Configuration
         */
        plasmaSwitchboard sb = (plasmaSwitchboard)env;
-        Hashtable enabledParsers = sb.parser.getEnabledParserList();
-        Hashtable availableParsers = sb.parser.getAvailableParserList();
+        HashSet enabledParsers = sb.parser.getEnabledParserList();
+        HashSet parserInfos = new HashSet(sb.parser.getAvailableParserList().values());
        
-        // fetching a list of all available mimetypes
-        List availableParserKeys = Arrays.asList(availableParsers.keySet().toArray(new String[availableParsers.size()]));
-        
-        // sort it
-        Collections.sort(availableParserKeys);
+//        // fetching a list of all available mimetypes
+//        List availableParserKeys = Arrays.asList(availableParsers.entrySet().toArray(new ParserInfo[availableParsers.size()]));
+//        
+//        // sort it
+//        Collections.sort(availableParserKeys);
        
        // loop through the mimeTypes and add it to the properties
        boolean allParsersEnabled = true;
        int parserIdx = 0;
-        Iterator availableParserIter = availableParserKeys.iterator();
+        
+        Iterator availableParserIter = parserInfos.iterator();
        while (availableParserIter.hasNext()) {
-            String mimeType = (String) availableParserIter.next();
-            String parserName = (String) availableParsers.get(mimeType);
-            boolean parserIsEnabled = enabledParsers.containsKey(mimeType);
+            ParserInfo parserInfo = (ParserInfo) availableParserIter.next();
+            prop.put("parser_" + parserIdx + "_name", parserInfo.parserName);
+            prop.put("parser_" + parserIdx + "_version", parserInfo.parserVersionNr);
+            prop.put("parser_" + parserIdx + "_usage", Integer.toString(parserInfo.usageCount));
            
-            prop.put("parser_" + parserIdx + "_mime", mimeType);
-            prop.put("parser_" + parserIdx + "_name", parserName);
-            prop.put("parser_" + parserIdx + "_shortname", parserName.substring(parserName.lastIndexOf(".")+1));
-            prop.put("parser_" + parserIdx + "_status", parserIsEnabled ? 1:0);
-            allParsersEnabled &= parserIsEnabled;
+            int mimeIdx = 0;
+            Enumeration mimeTypeIter = parserInfo.supportedMimeTypes.keys();
+            while (mimeTypeIter.hasMoreElements()) {
+                String mimeType = (String)mimeTypeIter.nextElement();
+                
+                boolean parserIsEnabled = enabledParsers.contains(mimeType);
+                
+                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_mimetype", mimeType);
+                //prop.put("parser_" + parserIdx + "_name", parserName);
+                //prop.put("parser_" + parserIdx + "_shortname", parserName.substring(parserName.lastIndexOf(".")+1));
+                prop.put("parser_" + parserIdx + "_mime_" + mimeIdx + "_status", enabledParsers.contains(mimeType) ? 1:0);
+                allParsersEnabled &= parserIsEnabled;
+                
+                mimeIdx++;
+            }
+            prop.put("parser_" + parserIdx + "_mime", mimeIdx);
            
            parserIdx++;
        }
--- a/httpd.mime
+++ b/httpd.mime
@ -37,6 +37,7 @@ mov     = video/quicktime
 mpe     = video/mpeg
 mpeg    = video/mpeg
 mpg     = video/mpeg
+odt		= application/vnd.oasis.opendocument.text
 ogg     = audio/ogg-vorbis
 pac     = application/x-ns-proxy-autoconfig
 pdf     = application/pdf
@ -70,6 +71,7 @@ tif     = image/tiff
 tiff    = image/tiff
 torrent = application/x-bittorrent
 txt     = text/plain
+vcf     = text/x-vcard
 wav     = audio/x-wav
 xhtml   = application/xhtml+xml
 xla     = application/msexcel
--- a/libx/jmimemagic-0.0.4a.jar
+++ b/libx/jmimemagic-0.0.4a.jar
--- a/libx/odf_utils_05_11_10.jar
+++ b/libx/odf_utils_05_11_10.jar
--- a/source/de/anomic/http/httpdFileHandler.java
+++ b/source/de/anomic/http/httpdFileHandler.java
@ -346,7 +346,7 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
        int argc;
        if (argsString == null) {
            // no args here, maybe a POST with multipart extension
-            int length;
+            int length = 0;
            //System.out.println("HEADER: " + requestHeader.toString()); // DEBUG
            if (method.equals(httpHeader.METHOD_POST)) {

@ -356,10 +356,11 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
                } else if (requestHeader.gzip()) {
                    length = -1;
                    gzipBody = new GZIPInputStream(body);
-                } else {
-                    httpd.sendRespondError(conProp,out,4,403,null,"bad post values",null); 
-                    return;
                }
+//                } else {
+//                    httpd.sendRespondError(conProp,out,4,403,null,"bad post values",null); 
+//                    return;
+//                }
                
                // if its a POST, it can be either multipart or as args in the body
                if ((requestHeader.containsKey(httpHeader.CONTENT_TYPE)) &&
@ -438,7 +439,7 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
                }
            }else{
                    //you cannot share a .png/.gif file with a name like a class in htroot.
-                    if ( !(targetFile.exists()) && !((path.endsWith("png")||path.endsWith("gif"))&&targetClass!=null ) ){
+                    if ( !(targetFile.exists()) && !((path.endsWith("png")||path.endsWith("gif")||path.endsWith(".stream"))&&targetClass!=null ) ){
                        targetFile = new File(htDocsPath, path);
                        targetClass = rewriteClassFile(new File(htDocsPath, path));
                    }
@ -486,6 +487,20 @@ public final class httpdFileHandler extends httpdAbstractHandler implements http
                    Thread.currentThread().sleep(200); // see below
                    serverFileUtils.write(result, out);
                }
+            } else if ((targetClass != null) && (path.endsWith(".stream"))) {
+                // call rewrite-class
+                requestHeader.put("CLIENTIP", conProp.getProperty("CLIENTIP"));
+                requestHeader.put("PATH", path);
+                requestHeader.put("INPUTSTREAM", body);
+                requestHeader.put("OUTPUTSTREAM", out);
+             
+                httpd.sendRespondHeader(this.connectionProperties, out, httpVersion, 200, null);                
+                
+                // in case that there are no args given, args = null or empty hashmap
+                serverObjects tp = (serverObjects) rewriteMethod(targetClass).invoke(null, new Object[] {requestHeader, args, switchboard});
+             
+                this.forceConnectionClose();
+                return;                
            } else if ((targetFile.exists()) && (targetFile.canRead())) {
                // we have found a file that can be written to the client
                // if this file uses templates, then we use the template
--- a/source/de/anomic/plasma/parser/AbstractParser.java
+++ b/source/de/anomic/plasma/parser/AbstractParser.java
@ -73,6 +73,16 @@ public abstract class AbstractParser implements Parser{
     * purposes.
     */
    protected serverLog theLogger = null;
+
+    /**
+     * Version number of the parser
+     */    
+    protected String parserVersionNr = "0.1";
+    
+    /**
+     * Parser name
+     */
+    protected String parserName = this.getClass().getSimpleName();
    
    /**
     * The Constructor of this class.
@ -165,4 +175,18 @@ public abstract class AbstractParser implements Parser{
        this.theLogger = log;
    }
    
+    /**
+     * Returns the version number of the parser
+     * @return parser version number
+     */
+    public String getVersion() {
+        return this.parserVersionNr;
+    }
+    
+    /**
+     * Return the name of the parser
+     */
+    public String getName() {
+        return parserName;
+    }
 }
--- a/source/de/anomic/plasma/parser/Parser.java
+++ b/source/de/anomic/plasma/parser/Parser.java
@ -122,4 +122,17 @@ public interface Parser {
     */
    public void setLogger(serverLog log);
    
+    /**
+     * Returns the version number of the current parser
+     * @return parser version number
+     */
+    public String getVersion();
+    
+    /**
+     * Returns the name of the parser
+     * @return parser name
+     */
+    public String getName();
 }
+
+
--- a/source/de/anomic/plasma/parser/ParserInfo.java
+++ b/source/de/anomic/plasma/parser/ParserInfo.java
@ -0,0 +1,34 @@
+package de.anomic.plasma.parser;
+
+import java.util.Hashtable;
+
+public class ParserInfo {
+    // general parser info
+    public Class parserClass;
+    public String parserClassName;
+    
+    public String parserName;
+    public String parserVersionNr;
+    
+    // parser properties
+    public String[] libxDependencies;
+    public Hashtable supportedMimeTypes;
+    
+    // usage statistic
+    public int usageCount = 0;
+    
+    public String toString() {
+        StringBuffer toStr = new StringBuffer();
+        
+        toStr.append(this.parserName).append(" V")
+             .append((this.parserVersionNr==null)?"0.0":this.parserVersionNr).append(" | ")
+             .append(this.parserClassName).append(" | ")
+             .append(this.supportedMimeTypes);
+        
+        return toStr.toString();
+    }
+    
+    public synchronized void incUsageCounter() {
+        this.usageCount++;
+    }
+}
--- a/source/de/anomic/plasma/parser/bzip/bzipParser.java
+++ b/source/de/anomic/plasma/parser/bzip/bzipParser.java
@ -77,9 +77,10 @@ public class bzipParser extends AbstractParser implements Parser {
    private static final String[] LIBX_DEPENDENCIES = new String[] {
        "bzip2.jar"
    };
-
+    
    public bzipParser() {        
        super(LIBX_DEPENDENCIES);
+        parserName = "Bzip 2 UNIX Compressed File Parser";
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -105,7 +106,6 @@ public class bzipParser extends AbstractParser implements Parser {
            
            int read = 0;
            byte[] data = new byte[1024];
-                                                
            CBZip2InputStream zippedContent = new CBZip2InputStream(source);        
            
            tempFile = File.createTempFile("bunzip","tmp");
--- a/source/de/anomic/plasma/parser/doc/docParser.java
+++ b/source/de/anomic/plasma/parser/doc/docParser.java
@ -75,6 +75,7 @@ implements Parser {
    
 	public docParser() {
 		super(LIBX_DEPENDENCIES);
+        parserName = "Word Document Parser";
 	}

 	public plasmaParserDocument parse(URL location, String mimeType,
--- a/source/de/anomic/plasma/parser/gzip/gzipParser.java
+++ b/source/de/anomic/plasma/parser/gzip/gzipParser.java
@ -76,6 +76,7 @@ public class gzipParser extends AbstractParser implements Parser {
    
    public gzipParser() {        
        super(LIBX_DEPENDENCIES);
+        parserName = "GNU Zip Compressed Archive Parser";
    }
    
    public Hashtable getSupportedMimeTypes() {
--- a/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
+++ b/source/de/anomic/plasma/parser/mimeType/mimeTypeParser.java
@ -47,6 +47,7 @@ import java.io.File;
 import java.io.InputStream;
 import java.net.URL;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Hashtable;

 import org.apache.log4j.Level;
@ -74,7 +75,10 @@ implements Parser {
    static { 
        SUPPORTED_MIME_TYPES.put("text/xml","xml");
        SUPPORTED_MIME_TYPES.put("application/xml","xml"); 
-        SUPPORTED_MIME_TYPES.put("application/octet-stream","");
+        SUPPORTED_MIME_TYPES.put("application/x-xml","xml");        
+        SUPPORTED_MIME_TYPES.put("application/octet-stream","");        
+        SUPPORTED_MIME_TYPES.put("application/x-compress","");
+        SUPPORTED_MIME_TYPES.put("application/x-compressed","");
    } 
    
    /**
@ -88,14 +92,56 @@ implements Parser {
        "xerces.jar"
    };
    
+    /**
+     * Helping structure used to detect loops in the mimeType detection
+     * process
+     */
+    private static Hashtable threadLoopDetection = new Hashtable();
+    
    public mimeTypeParser() {
        super(LIBX_DEPENDENCIES);
+        parserName = "MimeType Parser"; 
+    }
+    
+    public String getMimeType (File sourceFile) {
+        String mimeType = null;
+        
+        try {    
+            Magic theMagic = new Magic();           
+            MagicMatch match = theMagic.getMagicMatch(sourceFile);        
+            
+            // if a match was found we can return the new mimeType
+            if (match!=null) {
+                Collection subMatches = match.getSubMatches();
+                if ((subMatches != null) && (!subMatches.isEmpty())) {
+                    mimeType = ((MagicMatch) subMatches.iterator().next()).getMimeType();
+                } else {
+                    mimeType = match.getMimeType();
+                }
+                return mimeType;
+            }
+        } catch (Exception e) {
+            
+        }
+        return null;        
    }
    
    public plasmaParserDocument parse(URL location, String mimeType, File sourceFile) throws ParserException {
        
+        String orgMimeType = mimeType;
+        
        // determining the mime type of the file ...
        try {       
+            // adding current thread to loop detection list
+            Integer loopDepth = null;
+            if (threadLoopDetection.containsKey(Thread.currentThread())) {
+                loopDepth = (Integer) threadLoopDetection.get(Thread.currentThread());                
+            } else {
+                loopDepth = new Integer(0);
+            }
+            if (loopDepth.intValue() > 5) return null;
+            threadLoopDetection.put(Thread.currentThread(),new Integer(loopDepth.intValue()+1));
+            
            // deactivating the logging for jMimeMagic
            Logger theLogger = Logger.getLogger("net.sf.jmimemagic");
            theLogger.setLevel(Level.OFF);
@ -115,6 +161,7 @@ implements Parser {
                
                // to avoid loops we have to test if the mimetype has changed ...
                if (this.getSupportedMimeTypes().containsKey(mimeType)) return null;
+                if (orgMimeType.equals(mimeType)) return null;
                
                plasmaParser theParser = new plasmaParser();
                return theParser.parseSource(location,mimeType,sourceFile);
@ -123,6 +170,13 @@ implements Parser {
            
        } catch (Exception e) {
            return null;
+        } finally {
+            Integer loopDepth = (Integer) threadLoopDetection.get(Thread.currentThread());                
+            if (loopDepth.intValue() <= 1) {
+                threadLoopDetection.remove(Thread.currentThread());
+            } else {
+                threadLoopDetection.put(Thread.currentThread(), new Integer(loopDepth.intValue()-1));
+            }
        }
    }
    
--- a/source/de/anomic/plasma/parser/odt/build.xml
+++ b/source/de/anomic/plasma/parser/odt/build.xml
@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<project name="YACY - odtParser" default="dist">
+    <description>
+            A class to parse gzip files
+    </description>
+
+    <property name="parserShortName" value="odt"/>
+	<property name="parserVersion" value="0.1"/>
+
+    <property name="parserLongName" value="yacyContentParser_${parserShortName}"/>    	
+   	<property name="parserArchive" location="${release}/${parserLongName}_${parserVersion}.tgz"/>	    	
+    	
+    <target name="compile">
+  	  <javac srcdir="${src}/de/anomic/plasma/parser/${parserShortName}" destdir="${build}" source="${javacSource}" target="${javacTarget}" debug="true" debuglevel="lines,vars,source">
+  	  	<classpath>
+  	  	  <pathelement location="${build}" />
+   	  	  <pathelement location="${libx}/odf_utils_05_11_10.jar" />
+  	  	</classpath>
+  	  </javac>    	
+    </target>
+	
+
+    <target name="zip" depends="compile">
+  	  <tar destfile="${parserArchive}" compression="gzip">
+  	  	<tarfileset dir="${libx}" 
+  	  				includes="odf_utils_05_11_10.*" 
+  	  				prefix="${releaseDir}/libx/"
+			  	  	dirmode="755" mode="644"/>    	  	
+  	  	<tarfileset dir="${src}/de/anomic/plasma/parser/${parserShortName}" 
+  	  				prefix="${releaseDir}/source/de/anomic/plasma/parser/${parserShortName}"
+			  	  	dirmode="755" mode="644"/>
+  	  	<tarfileset dir="${build}/de/anomic/plasma/parser/${parserShortName}" 
+  	  				prefix="${releaseDir}/classes/de/anomic/plasma/parser/${parserShortName}"
+			  	  	dirmode="755" mode="644"/>	  	
+  	  </tar>    	
+    </target>	
+
+    <target name="copy" depends="compile">
+        <copy todir="${release}/libx/">
+             <fileset dir="${libx}" includes="odf_utils_05_11_10.*"/> 
+        </copy> 
+        <copy todir="${release}/source/de/anomic/plasma/parser/${parserShortName}">
+             <fileset dir="${src}/de/anomic/plasma/parser/${parserShortName}" includes="**/*"/> 
+        </copy>        
+        <copy todir="${release}/classes/de/anomic/plasma/parser/${parserShortName}">
+             <fileset dir="${build}/de/anomic/plasma/parser/${parserShortName}" includes="**/*"/> 
+        </copy>         
+    </target>  
+    
+
+    <target name="dist" depends="compile,zip" description="Compile and zip the parser"/>        
+	
+	
+</project>
+
--- a/source/de/anomic/plasma/parser/odt/odtParser.java
+++ b/source/de/anomic/plasma/parser/odt/odtParser.java
@ -0,0 +1,214 @@
+//zipParser.java 
+//------------------------
+//part of YaCy
+//(C) by Michael Peter Christen; mc@anomic.de
+//first published on http://www.anomic.de
+//Frankfurt, Germany, 2005
+//
+//this file is contributed by Martin Thelian
+//last major change: 16.05.2005
+//
+//This program is free software; you can redistribute it and/or modify
+//it under the terms of the GNU General Public License as published by
+//the Free Software Foundation; either version 2 of the License, or
+//(at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//GNU General Public License for more details.
+//
+//You should have received a copy of the GNU General Public License
+//along with this program; if not, write to the Free Software
+//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+//Using this software in any meaning (reading, learning, copying, compiling,
+//running) means that you agree that the Author(s) is (are) not responsible
+//for cost, loss of data or any harm that may be caused directly or indirectly
+//by usage of this softare or this documentation. The usage of this software
+//is on your own risk. The installation and usage (starting/running) of this
+//software may allow other people or application to access your computer and
+//any attached devices and is highly dependent on the configuration of the
+//software which must be done by the user of the software; the author(s) is
+//(are) also not responsible for proper configuration and usage of the
+//software, even if provoked by documentation provided together with
+//the software.
+//
+//Any changes to this file according to the GPL as documented in the file
+//gpl.txt aside this file in the shipment you received can be done to the
+//lines that follows this copyright notice here, but changes must not be
+//done inside the copyright notive above. A re-distribution must contain
+//the intact and unchanged copyright notice.
+//Contributions and changes to the program code must be marked as such.
+
+package de.anomic.plasma.parser.odt;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import com.catcode.odf.ODFMetaFileAnalyzer;
+import com.catcode.odf.OpenDocumentMetadata;
+import com.catcode.odf.OpenDocumentTextInputStream;
+
+import de.anomic.http.httpc;
+import de.anomic.plasma.plasmaParserDocument;
+import de.anomic.plasma.parser.AbstractParser;
+import de.anomic.plasma.parser.Parser;
+import de.anomic.plasma.parser.ParserException;
+import de.anomic.server.serverFileUtils;
+import de.anomic.server.logging.serverLog;
+
+public class odtParser extends AbstractParser implements Parser {
+
+    /**
+     * a list of mime types that are supported by this parser class
+     * @see #getSupportedMimeTypes()
+     */
+    public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();    
+    static { 
+        SUPPORTED_MIME_TYPES.put("application/vnd.oasis.opendocument.text","odt");
+        SUPPORTED_MIME_TYPES.put("application/x-vnd.oasis.opendocument.text","odt");
+    }     
+
+    /**
+     * a list of library names that are needed by this parser
+     * @see Parser#getLibxDependences()
+     */
+    private static final String[] LIBX_DEPENDENCIES = new String[] {"odf_utils_05_11_10.jar"};        
+    
+    public odtParser() {        
+        super(LIBX_DEPENDENCIES);
+        parserName = "OASIS OpenDocument V2 Text Document Parser"; 
+    }
+    
+    public Hashtable getSupportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+    
+    public plasmaParserDocument parse(URL location, String mimeType, File dest) throws ParserException {
+        
+        try {          
+            byte[] docContent     = null;
+            String docDescription = null;
+            String docKeywords    = null;
+            String docShortTitle       = null;
+            String docLongTitle     = null;
+            
+            // opening the file as zip file
+            ZipFile zipFile= new ZipFile(dest);
+            Enumeration zipEnum = zipFile.entries();
+            
+            // looping through all containing files
+            while (zipEnum.hasMoreElements()) {
+                ZipEntry zipEntry= (ZipEntry) zipEnum.nextElement();
+                String entryName = zipEntry.getName();
+                
+                // content.xml contains the document content in xml format
+                if (entryName.equals("content.xml")) {
+                    InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
+                    OpenDocumentTextInputStream odStream = new OpenDocumentTextInputStream(zipFileEntryStream);
+                    docContent = serverFileUtils.read(odStream); 
+                
+                // meta.xml contains metadata about the document
+                } else if (entryName.equals("meta.xml")) {
+                    InputStream zipFileEntryStream = zipFile.getInputStream(zipEntry);
+                    ODFMetaFileAnalyzer metaAnalyzer = new ODFMetaFileAnalyzer();
+                    OpenDocumentMetadata metaData = metaAnalyzer.analyzeMetaData(zipFileEntryStream);
+                    docDescription = metaData.getDescription();
+                    docKeywords    = metaData.getKeyword();
+                    docShortTitle  = metaData.getTitle();
+                    docLongTitle   = metaData.getSubject();
+                    
+                    // if there is no title availabe we generate one
+                    if (docLongTitle == null) {
+                        if (docShortTitle != null) {
+                            docLongTitle = docShortTitle;
+                        } else if (docContent.length <= 80) {
+                            docLongTitle = new String(docContent);
+                        } else {
+                            byte[] title = new byte[80];
+                            System.arraycopy(docContent, 0, title, 0, 80);
+                            docLongTitle = new String(title);
+                        }
+                        docLongTitle.
+                        replaceAll("\r\n"," ").
+                        replaceAll("\n"," ").
+                        replaceAll("\r"," ").
+                        replaceAll("\t"," ");
+                    }
+                }
+            }
+         
+            return new plasmaParserDocument(
+                    location,
+                    mimeType,
+                    docKeywords,
+                    docShortTitle, 
+                    docLongTitle,
+                    null,
+                    docDescription,
+                    docContent,
+                    null,
+                    null);
+        } catch (Exception e) {            
+            e.printStackTrace();
+            throw new ParserException("Unable to parse the odt content. " + e.getMessage());
+        } catch (Error e) {
+            throw new ParserException("Unable to parse the odt content. " + e.getMessage());
+        }
+    }
+    
+    public plasmaParserDocument parse(URL location, String mimeType, InputStream source) throws ParserException {
+        File dest = null;
+        try {
+            // creating a tempfile
+            dest = File.createTempFile("OpenDocument", ".odt");
+            dest.deleteOnExit();
+            
+            // copying the stream into a file
+            serverFileUtils.copy(source, dest);
+            
+            // parsing the content
+            return parse(location, mimeType, dest);
+        } catch (Exception e) {
+            throw new ParserException("Unable to parse the odt document. " + e.getMessage());
+        } finally {
+            if (dest != null) try { dest.delete(); } catch (Exception e){}
+        }
+    }
+    
+    public void reset() {
+		// Nothing todo here at the moment
+    	
+    }
+    
+    public static void main(String[] args) {
+        try {
+            if (args.length != 1) return;
+            
+            // getting the content URL
+            URL contentUrl = new URL(args[0]);
+            
+            // creating a new parser
+            odtParser testParser = new odtParser();
+            
+            // setting the parser logger
+            testParser.setLogger(new serverLog("PARSER.ODT"));
+            
+            // downloading the document content
+            byte[] content = httpc.singleGET(contentUrl, 10000, null, null, null);
+            ByteArrayInputStream input = new ByteArrayInputStream(content);
+            
+            // parsing the document
+            testParser.parse(contentUrl, "application/vnd.oasis.opendocument.text", input);            
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+}
--- a/source/de/anomic/plasma/parser/pdf/pdfParser.java
+++ b/source/de/anomic/plasma/parser/pdf/pdfParser.java
@ -74,10 +74,11 @@ public class pdfParser extends AbstractParser implements Parser {
     */
    private static final String[] LIBX_DEPENDENCIES = new String[] {
        "PDFBox-0.7.2.jar"
-    };    
+    };        
    
    public pdfParser() {        
        super(LIBX_DEPENDENCIES);
+        parserName = "Acrobat Portable Document Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
--- a/source/de/anomic/plasma/parser/rss/rssParser.java
+++ b/source/de/anomic/plasma/parser/rss/rssParser.java
@ -91,10 +91,11 @@ public class rssParser extends AbstractParser implements Parser {
        "informa-0.6.0.jar",
        "commons-logging.jar",
        "jdom.jar"
-    };    
+    };       
    
 	public rssParser() {
 		super(LIBX_DEPENDENCIES);
+        parserName = "Rich Site Summary/Atom Feed Parser"; 
 	}

 	public plasmaParserDocument parse(URL location, String mimeType,
--- a/source/de/anomic/plasma/parser/rtf/rtfParser.java
+++ b/source/de/anomic/plasma/parser/rtf/rtfParser.java
@ -73,10 +73,11 @@ implements Parser {
     * a list of library names that are needed by this parser
     * @see Parser#getLibxDependences()
     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};
+    private static final String[] LIBX_DEPENDENCIES = new String[] {};    
    
 	public rtfParser() {
 		super(LIBX_DEPENDENCIES);
+        parserName = "Rich Text Format Parser";  
 	}

 	public plasmaParserDocument parse(URL location, String mimeType,
--- a/source/de/anomic/plasma/parser/tar/tarParser.java
+++ b/source/de/anomic/plasma/parser/tar/tarParser.java
@ -44,13 +44,16 @@
 package de.anomic.plasma.parser.tar;

 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.net.URL;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.LinkedList;
 import java.util.Map;
+import java.util.zip.GZIPInputStream;

 import com.ice.tar.TarEntry;
 import com.ice.tar.TarInputStream;
@ -61,6 +64,7 @@ import de.anomic.plasma.parser.AbstractParser;
 import de.anomic.plasma.parser.Parser;
 import de.anomic.plasma.parser.ParserException;
 import de.anomic.server.serverByteBuffer;
+import de.anomic.server.serverFileUtils;

 public class tarParser extends AbstractParser implements Parser {

@ -71,6 +75,7 @@ public class tarParser extends AbstractParser implements Parser {
    public static final Hashtable SUPPORTED_MIME_TYPES = new Hashtable();    
    static { 
        SUPPORTED_MIME_TYPES.put("application/x-tar","tar");
+        SUPPORTED_MIME_TYPES.put("application/tar","tar");
    }     

    /**
@ -83,6 +88,7 @@ public class tarParser extends AbstractParser implements Parser {
    
    public tarParser() {        
        super(LIBX_DEPENDENCIES);
+        parserName = "Tape Archive File Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
@ -92,6 +98,18 @@ public class tarParser extends AbstractParser implements Parser {
    public plasmaParserDocument parse(URL location, String mimeType, InputStream source) throws ParserException {
        
        try {           
+            // creating a new parser class to parse the unzipped content
+            plasmaParser theParser = new plasmaParser();       
+            
+            /*
+             * If the mimeType was not reported correcly by the webserve we
+             * have to decompress it first
+             */
+            String ext = plasmaParser.getFileExt(location).toLowerCase();
+            if (ext.equals("gz") || ext.equals("tgz")) {
+                source = new GZIPInputStream(source);
+            }
+            
            StringBuffer docKeywords = new StringBuffer();
            StringBuffer docShortTitle = new StringBuffer();  
            StringBuffer docLongTitle = new StringBuffer();   
@ -100,11 +118,7 @@ public class tarParser extends AbstractParser implements Parser {
            serverByteBuffer docText = new serverByteBuffer();
            Map docAnchors = new HashMap();
            Map docImages = new HashMap(); 
-            
-            
-            // creating a new parser class to parse the unzipped content
-            plasmaParser theParser = new plasmaParser();            
-            
+                        
            // looping through the contained files
            TarEntry entry;
            TarInputStream tin = new TarInputStream(source);                      
@ -113,22 +127,34 @@ public class tarParser extends AbstractParser implements Parser {
                if (entry.isDirectory()) continue;
                
                // Get the entry name
-                String entryName = entry.getName();                
-                int idx = entryName.lastIndexOf(".");
-                String entryExt = (idx > -1) ? entryName.substring(idx+1) : null;
+                int idx = -1;
+                String entryName = entry.getName();
+                idx = entryName.lastIndexOf("/");
+                if (idx != -1) entryName = entryName.substring(idx+1);
+                idx = entryName.lastIndexOf(".");
+                String entryExt = (idx > -1) ? entryName.substring(idx+1) : "";
                
                // trying to determine the mimeType per file extension   
                String entryMime = plasmaParser.getMimeTypeByFileExt(entryExt);
                
                // getting the entry content
-                ByteArrayOutputStream bos = new ByteArrayOutputStream();
-                byte[] buf = new byte[(int) entry.getSize()];
-                int bytesRead = tin.read(buf);
-                bos.write(buf);
-                byte[] ut = bos.toByteArray();           
-                
-                // parsing the content
-                plasmaParserDocument theDoc = theParser.parseSource(location,entryMime,ut);
+                plasmaParserDocument theDoc = null;
+                File tempFile = null;
+                try {
+
+
+                    byte[] buf = new byte[(int) entry.getSize()];
+                    int bytesRead = tin.read(buf);
+
+                    tempFile = File.createTempFile("tarParser_" + ((idx>-1)?entryName.substring(0,idx):entryName), (entryExt.length()>0)?"."+entryExt:entryExt);
+                    serverFileUtils.write(buf, tempFile);           
+                    
+                    // parsing the content
+                    
+                    theDoc = theParser.parseSource(tempFile.toURL(),entryMime,tempFile);
+                } finally {
+                    if (tempFile != null) try {tempFile.delete(); } catch(Exception ex){}
+                }
                if (theDoc == null) continue;
                
                // merging all documents together
--- a/source/de/anomic/plasma/parser/vcf/vcfParser.java
+++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java
@ -86,10 +86,11 @@ public class vcfParser extends AbstractParser implements Parser {
     * a list of library names that are needed by this parser
     * @see Parser#getLibxDependences()
     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {"commons-codec-1.3.jar"};    
+    private static final String[] LIBX_DEPENDENCIES = new String[] {"commons-codec-1.3.jar"};        
    
    public vcfParser() {        
        super(LIBX_DEPENDENCIES);
+        parserName = "vCard Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
--- a/source/de/anomic/plasma/parser/zip/zipParser.java
+++ b/source/de/anomic/plasma/parser/zip/zipParser.java
@ -79,10 +79,11 @@ public class zipParser extends AbstractParser implements Parser {
     * a list of library names that are needed by this parser
     * @see Parser#getLibxDependences()
     */
-    private static final String[] LIBX_DEPENDENCIES = new String[] {};    
+    private static final String[] LIBX_DEPENDENCIES = new String[] {};        
    
    public zipParser() {        
        super(LIBX_DEPENDENCIES);
+        parserName = "Compressed Archive File Parser"; 
    }
    
    public Hashtable getSupportedMimeTypes() {
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@ -46,7 +46,6 @@
 package de.anomic.plasma;

 import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileFilter;
 import java.io.FileInputStream;
@ -70,8 +69,10 @@ import java.util.Set;

 import de.anomic.htmlFilter.htmlFilterContentScraper;
 import de.anomic.htmlFilter.htmlFilterOutputStream;
+import de.anomic.http.httpc;
 import de.anomic.plasma.parser.Parser;
 import de.anomic.plasma.parser.ParserException;
+import de.anomic.plasma.parser.ParserInfo;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.logging.serverLog;

@ -92,7 +93,7 @@ public final class plasmaParser {
     * @see #loadEnabledParserList()
     * @see #setEnabledParserList(Enumeration)
     */
-    private static final Properties enabledParserList = new Properties();    
+    private static final HashSet enabledParserList = new HashSet();    
    
    /**
     * A list of file extensions that are supported by all enabled parsers
@ -104,12 +105,43 @@ public final class plasmaParser {
     * be parsed in realtime.
     */
    private static final HashSet supportedRealtimeFileExt = new HashSet();
+
+    /**
+     * A list of mimeTypes that are generic
+     */
+    private static final HashSet genericMimeTypes = new HashSet();
+    static {
+        genericMimeTypes.add("text/plain");
+        genericMimeTypes.add("text/text");
+        genericMimeTypes.add("text/xml");
+        genericMimeTypes.add("application/xml");
+        genericMimeTypes.add("application/x-xml");        
+        genericMimeTypes.add("application/octet-stream");
+        genericMimeTypes.add("application/zip");
+        genericMimeTypes.add("application/x-zip");
+        genericMimeTypes.add("application/x-zip-compressed");
+        genericMimeTypes.add("application/x-compress");
+        genericMimeTypes.add("application/x-compressed");
+    }
    
    /**
     * A list of mimeTypes that can be parsed in Realtime (on the fly)
     */
    private static final HashSet realtimeParsableMimeTypes = new HashSet();    
    
+    private static final Properties mimeTypeLookupByFileExt = new Properties();
+    static {
+        // loading a list of extensions from file
+        BufferedInputStream bufferedIn = null;
+        try {            
+            mimeTypeLookupByFileExt.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("httpd.mime"))));
+        } catch (IOException e) {
+            System.err.println("ERROR: httpd.mime not found in settings path");
+        } finally {
+            if (bufferedIn != null) try{bufferedIn.close();}catch(Exception e){}
+        }    
+    }
+    
    /**
     * A pool of parsers.
     * @see plasmaParserPool
@ -162,7 +194,7 @@ public final class plasmaParser {
        
        // The maximum number of idle connections connections in the pool
        // 0 = no limit.        
-        config.maxIdle = 10;    
+        config.maxIdle = 5;    
        
        config.whenExhaustedAction = GenericObjectPool.WHEN_EXHAUSTED_BLOCK; 
        config.minEvictableIdleTimeMillis = 30000; 
@ -175,6 +207,8 @@ public final class plasmaParser {
        loadAvailableParserList();      
    }
    
+    private serverLog theLogger = new serverLog("PARSER");
+    
    /**
     * This function is used to initialize the realtimeParsableMimeTypes List.
     * This list contains a list of mimeTypes that can be parsed in realtime by
@ -185,7 +219,7 @@ public final class plasmaParser {
    public static void initRealtimeParsableMimeTypes(String realtimeParsableMimeTypes) {
        LinkedList mimeTypes = new LinkedList();
        if ((realtimeParsableMimeTypes == null) || (realtimeParsableMimeTypes.length() == 0)) {
-            
+            // Nothing todo here
        } else {            
            String[] realtimeParsableMimeTypeList = realtimeParsableMimeTypes.split(",");        
            for (int i = 0; i < realtimeParsableMimeTypeList.length; i++) mimeTypes.add(realtimeParsableMimeTypeList[i].toLowerCase().trim());
@ -280,7 +314,7 @@ public final class plasmaParser {
        }        
        
        synchronized (enabledParserList) { 
-            return enabledParserList.containsKey(mimeType);
+            return enabledParserList.contains(mimeType);
        }
    }
    
@ -302,7 +336,7 @@ public final class plasmaParser {
            
        // termining last position of . in file path
        p = name.lastIndexOf('.');
-        if (p < 0) return name; // seams to be strange, but this is a directory entry or default file (html)
+        if (p < 0) return ""; 
        return name.substring(p + 1);        
    }
    
@ -352,19 +386,8 @@ public final class plasmaParser {
        return ((pos < 0) ? mimeType : mimeType.substring(0, pos));              
    }
    
-    public static String getMimeTypeByFileExt(String fileExt) {
-        // loading a list of extensions from file
-        Properties prop = new Properties();
-        BufferedInputStream bufferedIn = null;
-        try {            
-            prop.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("httpd.mime"))));
-        } catch (IOException e) {
-            System.err.println("ERROR: httpd.mime not found in settings path");
-        } finally {
-            if (bufferedIn != null) try{bufferedIn.close();}catch(Exception e){}
-        }
-        
-        return prop.getProperty(fileExt,"application/octet-stream");
+    public static String getMimeTypeByFileExt(String fileExt) {        
+        return mimeTypeLookupByFileExt.getProperty(fileExt,"application/octet-stream");
    }
    
    public plasmaParser() {
@ -373,7 +396,7 @@ public final class plasmaParser {
    
    public static String[] setEnabledParserList(Set mimeTypeSet) {
        
-        Properties newEnabledParsers = new Properties();
+        HashSet newEnabledParsers = new HashSet();
        HashSet newSupportedFileExt = new HashSet();
        
        if (mimeTypeSet != null) {
@ -384,7 +407,7 @@ public final class plasmaParser {
                    Parser theParser = null;
                    try {
                        // getting the parser
-                        theParser = (Parser) plasmaParser.theParserPool.borrowObject(availableParserList.get(mimeType));
+                        theParser = (Parser) plasmaParser.theParserPool.borrowObject(((ParserInfo)availableParserList.get(mimeType)).parserClassName);
                        
                        // getting a list of mimeTypes that the parser supports
                        Hashtable parserSupportsMimeTypes = theParser.getSupportedMimeTypes();
@ -397,7 +420,7 @@ public final class plasmaParser {
                                newSupportedFileExt.addAll(Arrays.asList(extArray));
                            }
                        }
-                        newEnabledParsers.put(mimeType,availableParserList.get(mimeType));
+                        newEnabledParsers.add(mimeType);
                        
                    } catch (Exception e) {
                        serverLog.logSevere("PARSER", "error in setEnabledParserList", e);
@ -411,7 +434,7 @@ public final class plasmaParser {
        
        synchronized (enabledParserList) {
            enabledParserList.clear();
-            enabledParserList.putAll(newEnabledParsers);
+            enabledParserList.addAll(newEnabledParsers);
        }
        
        
@ -420,34 +443,18 @@ public final class plasmaParser {
            supportedFileExt.addAll(newSupportedFileExt);
        }

-        return (String[])newEnabledParsers.keySet().toArray(new String[newEnabledParsers.size()]);
+        return (String[])newEnabledParsers.toArray(new String[newEnabledParsers.size()]);
    }
    
-    public Hashtable getEnabledParserList() {
+    public HashSet getEnabledParserList() {
        synchronized (plasmaParser.enabledParserList) {
-            return (Hashtable) plasmaParser.enabledParserList.clone();
+            return (HashSet) plasmaParser.enabledParserList.clone();
 		}        
    }
    
    public Hashtable getAvailableParserList() {
        return plasmaParser.availableParserList;
-    }
-    
-    private static void loadEnabledParserList() {
-        // loading a list of availabe parser from file
-        Properties prop = new Properties();
-        BufferedInputStream bufferedIn = null;
-        try {
-            prop.load(bufferedIn = new BufferedInputStream(new FileInputStream(new File("yacy.parser"))));
-        } catch (IOException e) {
-            System.err.println("ERROR: yacy.parser not found in settings path");
-        } finally {
-            if (bufferedIn != null) try{ bufferedIn.close(); }catch(Exception e){}
-        }
-        
-        // enable them ...
-        setEnabledParserList(prop.keySet());
-    }
+    }    
    
    private static void loadAvailableParserList() {
        try {
@ -474,9 +481,11 @@ public final class plasmaParser {
             */
            File[] parserDirectories = parserDir.listFiles(parserDirectoryFilter);
            if (parserDirectories == null) return;
+            
            for (int parserDirNr=0; parserDirNr< parserDirectories.length; parserDirNr++) {
                File currentDir = parserDirectories[parserDirNr];
                serverLog.logFine("PARSER", "Searching in directory " + currentDir.toString());
+                
                String[] parserClasses = currentDir.list(parserFileNameFilter);
                if (parserClasses == null) continue;
                
@ -506,12 +515,25 @@ public final class plasmaParser {
                        
                        // loading the list of mime-types that are supported by this parser class
                        Hashtable supportedMimeTypes = ((Parser)theParser).getSupportedMimeTypes();
+                        
+                        // creating a parser info object
+                        ParserInfo parserInfo = new ParserInfo();
+                        parserInfo.parserClass = parserClass;
+                        parserInfo.parserClassName = fullClassName;
+                        parserInfo.libxDependencies = neededLibx;
+                        parserInfo.supportedMimeTypes = supportedMimeTypes;
+                        parserInfo.parserVersionNr = ((Parser)theParser).getVersion();
+                        parserInfo.parserName = ((Parser)theParser).getName();
+                        
                        Iterator mimeTypeIterator = supportedMimeTypes.keySet().iterator();
                        while (mimeTypeIterator.hasNext()) {
                            String mimeType = (String) mimeTypeIterator.next();
-                            availableParserList.put(mimeType,fullClassName);
+                            availableParserList.put(mimeType,parserInfo );
                            serverLog.logInfo("PARSER", "Found functional parser for mimeType '" + mimeType + "'." +
-                                              ((neededLibxBuf.length()>0)?"\n   Dependencies: " + neededLibxBuf.toString():""));
+                                              "\n\tName:    " + parserInfo.parserName + 
+                                              "\n\tVersion: " + parserInfo.parserVersionNr + 
+                                              "\n\tClass:   " + parserInfo.parserClassName +
+                                              ((neededLibxBuf.length()>0)?"\n\tDependencies: " + neededLibxBuf.toString():""));
                        }
                        
                    } catch (Exception e) { /* we can ignore this for the moment */
@ -537,50 +559,19 @@ public final class plasmaParser {
        try {
            theParserPool.close();
        } catch (Exception e) { }
-    }
+    }    
    
    public plasmaParserDocument parseSource(URL location, String mimeType, byte[] source) {
-        
-        Parser theParser = null;
+        File tempFile = null;
        try {
-            mimeType = getRealMimeType(mimeType);
-            String fileExt = getFileExt(location);
-            
-            // TODO: Handling of not trustable mimeTypes
-            // text/plain, octet-stream
-            if (
-                    (mimeType.equalsIgnoreCase("text/plain") && !fileExt.equalsIgnoreCase("txt")) || 
-                    (mimeType.equalsIgnoreCase("text/xml")   && !fileExt.equalsIgnoreCase("txt"))
-            ) {
-                if (enabledParserList.containsKey("application/octet-stream")) {
-                    mimeType = "application/octet-stream";
-                }
-            }
-            
-            // getting the correct parser for the given mimeType
-            theParser = this.getParser(mimeType);            
-            
-            // if a parser was found we use it ...
-            if (theParser != null) {
-                return theParser.parse(location, mimeType,source);
-            } else if (realtimeParsableMimeTypesContains(mimeType)) {        
-                // ... otherwise we make a html scraper and transformer
-                htmlFilterContentScraper scraper = new htmlFilterContentScraper(location);
-                OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);
-                hfos.write(source);
-                hfos.close();
-                return transformScraper(location, mimeType, scraper);
-            } else {
-                return null;
-            }
-        } catch (Exception e) {
-            //e.printStackTrace();
+            tempFile = File.createTempFile("parseSource", ".tmp");
+            return parseSource(location, mimeType, tempFile);
+        } catch (Exception e) {   
            return null;
        } finally {
-            if ((theParser != null) && (supportedMimeTypesContains(mimeType))) {
-                try { plasmaParser.theParserPool.returnObject(mimeType, theParser); } catch (Exception e) {}
-            }
+            if (tempFile != null) try { tempFile.delete(); } catch (Exception ex){}
        }
+        
    }

    public plasmaParserDocument parseSource(URL location, String mimeType, File sourceFile) {
@ -590,16 +581,58 @@ public final class plasmaParser {
            mimeType = getRealMimeType(mimeType);
            String fileExt = getFileExt(location);
            
-            // TODO: Handling of not trustable mimeTypes
-            // text/plain, octet-stream
-            if (
-                    (mimeType.equalsIgnoreCase("text/plain") && !fileExt.equalsIgnoreCase("txt")) || 
-                    (mimeType.equalsIgnoreCase("text/xml")   && !fileExt.equalsIgnoreCase("txt"))
-            ) {
-                if (enabledParserList.containsKey("application/octet-stream")) {
-                    mimeType = "application/octet-stream";
-                }
-            }          
+            if (this.theLogger.isFine())
+                this.theLogger.logFine("Parsing " + location + " with mimeType '" + mimeType + 
+                                       "' and file extension '" + fileExt + "'.");
+            
+            /*
+             * There are some problematic mimeType - fileExtension combination where we have to enforce
+             * a mimeType detection to get the proper parser for the content
+             * 
+             * - application/zip + .odt
+             * - text/plain + .odt
+             * - text/plain + .vcf
+             * - text/xml + .rss
+             * - text/xml + .atom
+             * 
+             * In all these cases we can trust the fileExtension and have to determine the proper mimeType.
+             * 
+             */
+            
+//            // Handling of not trustable mimeTypes
+//            // - text/plain
+//            // - text/xml
+//            // - application/octet-stream
+//            // - application/zip
+//            if (
+//                    (mimeType.equalsIgnoreCase("text/plain") && !fileExt.equalsIgnoreCase("txt")) || 
+//                    (mimeType.equalsIgnoreCase("text/xml")   && !fileExt.equalsIgnoreCase("txt")) 
+//            ) {
+//                if (this.theLogger.isFine())
+//                    this.theLogger.logFine("Document " + location + " has an mimeType '" + mimeType + 
+//                                           "' that seems not to be correct for file extension '" + fileExt + "'.");                
+//                
+//                if (enabledParserList.containsKey("application/octet-stream")) {
+//                    theParser = this.getParser("application/octet-stream");
+//                    Object newMime = theParser.getClass().getMethod("getMimeType", new Class[]{File.class}).invoke(theParser, sourceFile);
+//                    if (newMime == null)
+//                    if (newMime instanceof String) {
+//                        String newMimeType = (String)newMime;
+//                        if ((newMimeType.equals("application/octet-stream")) {
+//                            return null;
+//                        }
+//                        mimeType = newMimeType;
+//                    }
+//                } else {
+//                    return null;
+//                }
+//            } else if (mimeType.equalsIgnoreCase("application/zip") && fileExt.equalsIgnoreCase("odt")){
+//                if (enabledParserList.containsKey("application/vnd.oasis.opendocument.text")) {
+//                    mimeType = "application/vnd.oasis.opendocument.text";
+//                } else {
+//                    return null;
+//                }
+//            }        
            
            // getting the correct parser for the given mimeType
            theParser = this.getParser(mimeType);
@ -647,16 +680,18 @@ public final class plasmaParser {
     * @param mimeType
     * @return
     */
-    public Parser getParser(String mimeType) {
+    private Parser getParser(String mimeType) {

        mimeType = getRealMimeType(mimeType);        
        try {
            
            // determining the proper parser class name for the mimeType
            String parserClassName = null;
+            ParserInfo parserInfo = null;
            synchronized (plasmaParser.enabledParserList) {
-    	        if (plasmaParser.enabledParserList.containsKey(mimeType)) {
-    	            parserClassName = (String)plasmaParser.enabledParserList.get(mimeType);
+    	        if (plasmaParser.enabledParserList.contains(mimeType)) {
+                    parserInfo = (ParserInfo)plasmaParser.availableParserList.get(mimeType);
+    	            parserClassName = parserInfo.parserClassName;
    	        } else {
                    return null;
    	        }
@ -668,6 +703,7 @@ public final class plasmaParser {
            // checking if the created parser really supports the given mimetype 
            Hashtable supportedMimeTypes = theParser.getSupportedMimeTypes();
            if ((supportedMimeTypes != null) && (supportedMimeTypes.containsKey(mimeType))) {
+                parserInfo.incUsageCounter();
 				return theParser;
            }
            theParserPool.returnObject(parserClassName,theParser);
@ -740,10 +776,40 @@ public final class plasmaParser {
        //javac -classpath lib/commons-collections.jar:lib/commons-pool-1.2.jar -sourcepath source source/de/anomic/plasma/plasmaParser.java
        //java -cp source:lib/commons-collections.jar:lib/commons-pool-1.2.jar de.anomic.plasma.plasmaParser bug.html bug.out
        try {
-            File in = new File(args[0]);
-            //File out = new File(args[1]);
+            File contentFile = null;
+            URL contentURL = null;
+            String contentMimeType = "application/octet-stream";
+            
+            if (args.length < 2) {
+                System.err.println("Usage: java de.anomic.plasma.plasmaParser (-f filename|-u URL) [-m mimeType]");
+            }            
+                        
+            String mode = args[0];
+            if (mode.equalsIgnoreCase("-f")) {
+                contentFile = new File(args[1]);
+                contentURL = contentFile.toURL();
+            } else if (mode.equalsIgnoreCase("-u")) {
+                contentURL = new URL(args[1]);
+                
+                // downloading the document content
+                byte[] contentBytes = httpc.singleGET(contentURL, 10000, null, null, null);
+                
+                contentFile = File.createTempFile("content",".tmp");
+                contentFile.deleteOnExit();
+                serverFileUtils.write(contentBytes, contentFile);
+            }
+            
+            if ((args.length == 4)&&(args[2].equalsIgnoreCase("-m"))) {
+                contentMimeType = args[3];
+            }
+            
+            // creating a plasma parser
            plasmaParser theParser = new plasmaParser();
+            
+            // configuring the realtime parsable mimeTypes
            plasmaParser.initRealtimeParsableMimeTypes("application/xhtml+xml,text/html,text/plain");
+            
+            // configure all other supported mimeTypes
            plasmaParser.initParseableMimeTypes(
                    "application/atom+xml," +
                    "application/gzip," +
@ -763,14 +829,14 @@ public final class plasmaParser {
                    "text/xml," +
                    "application/x-bzip2," +
                    "application/postscript," +
-                    "text/x-vcard");
-            FileInputStream theInput = new FileInputStream(in);
-            ByteArrayOutputStream theOutput = new ByteArrayOutputStream();
-            serverFileUtils.copy(theInput, theOutput);
-            plasmaParserDocument document = theParser.parseSource(new URL("http://brain/~theli/test.ps"), null, theOutput.toByteArray());
-            //plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "application/pdf", theOutput.toByteArray());
-            //byte[] theText = document.getText();
-            //serverFileUtils.write(theText, out);
+                    "text/x-vcard," + 
+                    "application/vnd.oasis.opendocument.text," + 
+                    "application/x-vnd.oasis.opendocument.text");
+
+            // parsing the content
+            plasmaParserDocument document = theParser.parseSource(contentURL, contentMimeType, contentFile);
+
+            // printing out all parsed sentences
            if (document != null) {
                String[] sentences = document.getSentences();
                if (sentences != null) for (int i = 0; i < sentences.length; i++) System.out.println("line " + i + ":" + sentences[i]);
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@ -327,8 +327,9 @@ public class plasmaSnippetCache {
        if (header == null) {
            String filename = this.cacheManager.getCachePath(url).getName();
            int p = filename.lastIndexOf('.');
-            if (
+            if (    // if no extension is available
                    (p < 0) ||
+                    // or the extension is supported by one of the parsers
                    ((p >= 0) && (plasmaParser.supportedFileExtContains(filename.substring(p + 1))))
            ) {
                String supposedMime = "text/html";
--- a/source/de/anomic/server/logging/serverLog.java
+++ b/source/de/anomic/server/logging/serverLog.java
@ -81,69 +81,98 @@ public final class serverLog {
    public void setLevel(Level newLevel) {
        this.theLogger.setLevel(newLevel);
    }
-
+    
    public void logSevere(String message) {this.theLogger.severe(message);}
    public void logSevere(String message, Throwable thrown) {this.theLogger.log(Level.SEVERE,message,thrown);}
+    public boolean isSevere() { return this.theLogger.isLoggable(Level.SEVERE); }

    public void logWarning(String message) {this.theLogger.warning(message);}
    public void logWarning(String message, Throwable thrown) {this.theLogger.log(Level.WARNING,message,thrown);}
-
+    public boolean isWarning() { return this.theLogger.isLoggable(Level.WARNING); }
+    
    public void logConfig(String message) {this.theLogger.config(message);}
    public void logConfig(String message, Throwable thrown) {this.theLogger.log(Level.CONFIG,message,thrown);}
+    public boolean isConfig() { return this.theLogger.isLoggable(Level.CONFIG); }

    public void logInfo(String message) {this.theLogger.info(message);}
    public void logInfo(String message, Throwable thrown) {this.theLogger.log(Level.INFO,message,thrown);}
+    public boolean isInfo() { return this.theLogger.isLoggable(Level.INFO); }

    public void logFine(String message) {this.theLogger.fine(message);}
    public void logFine(String message, Throwable thrown) {this.theLogger.log(Level.FINE,message,thrown);}
+    public boolean isFine() { return this.theLogger.isLoggable(Level.FINE); }

    public void logFiner(String message) {this.theLogger.finer(message);}
-    public void logFiner(String message, Throwable thrown) {this.theLogger.log(Level.FINER,message,thrown);}    
+    public void logFiner(String message, Throwable thrown) {this.theLogger.log(Level.FINER,message,thrown);}   
+    public boolean isFiner() { return this.theLogger.isLoggable(Level.FINER); }
    
    public void logFinest(String message) {this.theLogger.finest(message);}
-    public void logFinest(String message, Throwable thrown) {this.theLogger.log(Level.FINEST,message,thrown);}      
+    public void logFinest(String message, Throwable thrown) {this.theLogger.log(Level.FINEST,message,thrown);} 
+    public boolean isFinest() { return this.theLogger.isLoggable(Level.FINEST); }
+    
+    private void log(Level level, String msg, Throwable thrown) {
+        this.theLogger.log(level, msg, thrown);
+    }    
    
    public boolean isLoggable(Level level) {
        return this.theLogger.isLoggable(level);
    }
    
+    
    // static log messages: log everything
    private static void log(String appName, int messageLevel, String message) {
        Logger.getLogger(appName).log(Level.parse(Integer.toString(messageLevel)),message);
    }
-    private void log(Level level, String msg, Throwable thrown) {
-        this.theLogger.log(level, msg, thrown);
-    }
+    
    public static void logSevere(String appName, String message) {
        Logger.getLogger(appName).severe(message);
    }
    public static void logSevere(String appName, String message, Throwable thrown) {
        Logger.getLogger(appName).log(Level.SEVERE,message,thrown);
    }
+    public static void isSevere(String appName) {
+        Logger.getLogger(appName).isLoggable(Level.SEVERE);
+    }    
+    
    public static void logWarning(String appName, String message) {
        Logger.getLogger(appName).warning(message);
    }
    public static void logWarning(String appName, String message, Throwable thrown) {
        Logger.getLogger(appName).log(Level.WARNING,message,thrown);
    }
+    public static void isWarning(String appName) {
+        Logger.getLogger(appName).isLoggable(Level.WARNING);
+    }      
+    
    public static void logConfig(String appName, String message) {
        Logger.getLogger(appName).config(message);
    }
    public static void logConfig(String appName, String message, Throwable thrown) {
        Logger.getLogger(appName).log(Level.CONFIG,message,thrown);
    }    
+    public static void isConfig(String appName) {
+        Logger.getLogger(appName).isLoggable(Level.CONFIG);
+    }     
+    
    public static void logInfo(String appName, String message) {
        Logger.getLogger(appName).info(message);
    }
    public static void logInfo(String appName, String message, Throwable thrown) {
        Logger.getLogger(appName).log(Level.INFO,message,thrown);
    }
+    public static void isInfo(String appName) {
+        Logger.getLogger(appName).isLoggable(Level.INFO);
+    }     
+    
    public static void logFine(String appName, String message) {
        Logger.getLogger(appName).fine(message);
    }
    public static void logFine(String appName, String message, Throwable thrown) {
        Logger.getLogger(appName).log(Level.FINE,message,thrown);
    }
+    public static void isFine(String appName) {
+        Logger.getLogger(appName).isLoggable(Level.FINE);
+    } 
    
    public static void logFiner(String appName, String message) {
        Logger.getLogger(appName).finer(message);
@ -151,6 +180,9 @@ public final class serverLog {
    public static void logFiner(String appName, String message, Throwable thrown) {
        Logger.getLogger(appName).log(Level.FINER,message,thrown);
    }
+    public static void isFiner(String appName) {
+        Logger.getLogger(appName).isLoggable(Level.FINER);
+    } 
    
    public static void logFinest(String appName, String message) {
        Logger.getLogger(appName).finest(message);
@ -158,6 +190,9 @@ public final class serverLog {
    public static void logFinest(String appName, String message, Throwable thrown) {
        Logger.getLogger(appName).log(Level.FINEST,message,thrown);
    }    
+    public static void isFinest(String appName) {
+        Logger.getLogger(appName).isLoggable(Level.FINEST);
+    } 
    
    public static final void configureLogging(File loggingConfigFile) throws SecurityException, FileNotFoundException, IOException {
        FileInputStream fileIn = null;