diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index 56bdee44d..ecf38c2f0 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -214,22 +214,22 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
}
String h;
if ((tagname.equalsIgnoreCase("h1")) && (text.length < 1024)) {
- h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString());
+ h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString()); // TODO: bugfix needed for UTF-8
if (h.length() > 0) headlines[0].add(h);
}
if ((tagname.equalsIgnoreCase("h2")) && (text.length < 1024)) {
- h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString());
+ h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString()); // TODO: bugfix needed for UTF-8
if (h.length() > 0) headlines[1].add(h);
}
if ((tagname.equalsIgnoreCase("h3")) && (text.length < 1024)) {
- h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString());
+ h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString()); // TODO: bugfix needed for UTF-8
if (h.length() > 0) headlines[2].add(h);
}
if ((tagname.equalsIgnoreCase("h4")) && (text.length < 1024)) {
- h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString());
+ h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString()); // TODO: bugfix needed for UTF-8
if (h.length() > 0) headlines[3].add(h);
}
- if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) title = cleanLine(super.stripAll(new serverByteBuffer(text)).toString());
+ if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) title = cleanLine(super.stripAll(new serverByteBuffer(text)).toString()); // TODO: bugfix needed for UTF-8
}
private static String cleanLine(String s) {
diff --git a/source/de/anomic/htmlFilter/htmlFilterOutputStream.java b/source/de/anomic/htmlFilter/htmlFilterOutputStream.java
index 8a60a01ac..0a679f0ed 100644
--- a/source/de/anomic/htmlFilter/htmlFilterOutputStream.java
+++ b/source/de/anomic/htmlFilter/htmlFilterOutputStream.java
@@ -58,6 +58,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.util.Enumeration;
import java.util.Properties;
@@ -157,7 +158,13 @@ public final class htmlFilterOutputStream extends OutputStream {
while (e.hasMoreElements()) {
key = (String) e.nextElement();
bb = bb.append((byte) 32).append(key.getBytes()).append((byte) '=');
- bb = bb.append(quotechar).append(prop.getProperty(key).getBytes()).append(quotechar);
+ bb = bb.append(quotechar);
+ try {
+ bb.append(prop.getProperty(key).getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e1) {
+ bb.append(prop.getProperty(key).getBytes());
+ }
+ bb.append(quotechar);
}
if (bb.length() > 0) return bb.getBytes(1);
return bb.getBytes();
diff --git a/source/de/anomic/server/serverByteBuffer.java b/source/de/anomic/server/serverByteBuffer.java
index 64a17898b..0943327df 100644
--- a/source/de/anomic/server/serverByteBuffer.java
+++ b/source/de/anomic/server/serverByteBuffer.java
@@ -45,6 +45,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
import java.util.Properties;
public final class serverByteBuffer extends OutputStream {
@@ -379,7 +380,11 @@ public final class serverByteBuffer extends OutputStream {
start = pos;
while ((pos < length) && (buffer[pos] != doublequote)) pos++;
if (pos >= length) break; // this is the case if we found no parent doublequote
- p.setProperty(key, new String(buffer, start, pos - start).trim());
+ try {
+ p.setProperty(key, new String(buffer, start, pos - start,"UTF-8").trim());
+ } catch (UnsupportedEncodingException e) {
+ p.setProperty(key, new String(buffer, start, pos - start).trim());
+ }
pos++;
} else if (buffer[pos] == singlequote) {
// search next singlequote