diff --git a/.classpath b/.classpath
index 5c4063b3c..8cc2580c5 100644
--- a/.classpath
+++ b/.classpath
@@ -41,5 +41,6 @@
 	<classpathentry kind="lib" path="lib/commons-fileupload-1.2.2.jar"/>
 	<classpathentry kind="lib" path="lib/log4j-1.2.16.jar"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
+	<classpathentry kind="lib" path="lib/icu4j-core.jar"/>
 	<classpathentry kind="output" path="gen"/>
 </classpath>
diff --git a/build.xml b/build.xml
index d81a1d039..eb083f4f8 100644
--- a/build.xml
+++ b/build.xml
@@ -192,6 +192,7 @@
         <pathelement location="${lib}/httpclient-4.0.1.jar" />
         <pathelement location="${lib}/httpcore-4.0.1.jar" />
         <pathelement location="${lib}/httpmime-4.0.1.jar" />
+        <pathelement location="${lib}/icu4j-core.jar" />
         <pathelement location="${lib}/J7Zip-modified.jar" />
         <pathelement location="${lib}/jakarta-oro-2.0.8.jar" />
     	<pathelement location="${lib}/jcifs-1.3.14.jar" />
diff --git a/lib/icu4j-core.jar b/lib/icu4j-core.jar
new file mode 100644
index 000000000..b62189c04
Binary files /dev/null and b/lib/icu4j-core.jar differ
diff --git a/lib/icu4j.license b/lib/icu4j.license
new file mode 100644
index 000000000..4879f2b44
--- /dev/null
+++ b/lib/icu4j.license
@@ -0,0 +1,51 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
+<title>ICU License - ICU 1.8.1 and later</title>
+</head>
+
+<body BGCOLOR="#ffffff">
+<h2>ICU License - ICU 1.8.1 and later</h2>
+
+<p>COPYRIGHT AND PERMISSION NOTICE</p>
+
+<p>
+Copyright (c) 1995-2010 International Business Machines Corporation and others
+</p>
+<p>
+All rights reserved.
+</p>
+<p>
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies
+of the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+</p>
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
+OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+</p>
+<p>
+Except as contained in this notice, the name of a copyright holder shall not be
+used in advertising or otherwise to promote the sale, use or other dealings in
+this Software without prior written authorization of the copyright holder.
+</p>
+
+<hr>
+<p><small>
+All trademarks and registered trademarks mentioned herein are the property of their respective owners.
+</small></p>
+</body>
+</html>
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index a7d4c891e..d0946769c 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -20,6 +20,7 @@
 
 package net.yacy.document.parser;
 
+import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -29,6 +30,8 @@ import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.regex.Pattern;
 
+import com.ibm.icu.text.CharsetDetector;
+
 import de.anomic.crawler.retrieval.HTTPLoader;
 
 import net.yacy.cora.document.MultiProtocolURI;
@@ -78,42 +81,56 @@ public class htmlParser extends AbstractParser implements Parser {
     public static ContentScraper parseToScraper(
             final MultiProtocolURI location, 
             final String documentCharset, 
-            final InputStream sourceStream) throws Parser.Failure {
+            InputStream sourceStream) throws Parser.Failure, IOException {
         
         // make a scraper
-        final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
         String charset = null;
 
+        // ah, we are lucky, we got a character-encoding via HTTP-header
         if (documentCharset != null) {
             charset = patchCharsetEncoding(documentCharset);
         }
         
+        // nothing found: try to find a meta-tag
         if (charset == null) {
             try {
+                final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
+                sourceStream = htmlFilter;
                 charset = htmlFilter.detectCharset();
             } catch (IOException e1) {
                 throw new Parser.Failure("Charset error:" + e1.getMessage(), location);
             }
         }
+
+        // the author didn't tell us the encoding, try the mozilla-heuristic
+        if (charset == null) {
+        	CharsetDetector det = new CharsetDetector();
+        	det.enableInputFilter(true);
+        	InputStream detStream = new BufferedInputStream(sourceStream);
+        	det.setText(detStream);
+        	charset = det.detect().getName();
+        	sourceStream = detStream;
+        }
         
+        // wtf? still nothing, just take system-standard
         if (charset == null) {
-            charset = patchCharsetEncoding(charset);
+            charset = Charset.defaultCharset().name();
         }
         
         Charset c;
         try {
-            c = Charset.forName(charset);
+        	c = Charset.forName(charset);
         } catch (IllegalCharsetNameException e) {
-            c = Charset.defaultCharset();
+        	c = Charset.defaultCharset();
         } catch (UnsupportedCharsetException e) {
-            c = Charset.defaultCharset();
+        	c = Charset.defaultCharset();
         }
         
         // parsing the content
         final ContentScraper scraper = new ContentScraper(location);        
         final TransformerWriter writer = new TransformerWriter(null,null,scraper,null,false);
         try {
-            FileUtils.copy(htmlFilter, writer, c);
+            FileUtils.copy(sourceStream, writer, c);
             writer.close();
         } catch (IOException e) {
             throw new Parser.Failure("IO error:" + e.getMessage(), location);
@@ -134,7 +151,11 @@ public class htmlParser extends AbstractParser implements Parser {
             final String documentCharset, 
             final InputStream sourceStream) throws Parser.Failure, InterruptedException {
         
-        return transformScraper(location, mimeType, documentCharset, parseToScraper(location, documentCharset, sourceStream));
+        try {
+			return transformScraper(location, mimeType, documentCharset, parseToScraper(location, documentCharset, sourceStream));
+		} catch (IOException e) {
+			throw new Parser.Failure("IOException in htmlParser: " + e.getMessage(), location);
+		}
     }
 
     private static Document[] transformScraper(final MultiProtocolURI location, final String mimeType, final String charSet, final ContentScraper scraper) {
@@ -173,8 +194,8 @@ public class htmlParser extends AbstractParser implements Parser {
      */
     public static String patchCharsetEncoding(String encoding) {
         
-        // return the system default encoding
-        if ((encoding == null) || (encoding.length() < 3)) return Charset.defaultCharset().name();
+        // do nothing with null
+        if ((encoding == null) || (encoding.length() < 3)) return null;
         
         // trim encoding string
         encoding = encoding.trim();