diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java index 9c3889a7b..d5985a533 100644 --- a/source/de/anomic/plasma/parser/rss/rssParser.java +++ b/source/de/anomic/plasma/parser/rss/rssParser.java @@ -148,14 +148,14 @@ public class rssParser extends AbstractParser implements Parser { anchors.put(itemURL.toString(),itemTitle); if ((text.length() != 0) && (text.byteAt(text.length() - 1) != 32)) text.append((byte) 32); - text.append(new serverByteBuffer(htmlFilterAbstractScraper.stripAll(new serverByteBuffer(itemDescr.getBytes()))).trim()).append((byte) ' '); + text.append(new serverByteBuffer(htmlFilterAbstractScraper.stripAll(new serverByteBuffer(itemDescr.getBytes("UTF-8")))).trim()).append((byte) ' '); // TODO: this does not work for utf-8 String itemContent = item.getElementValue("content"); if ((itemContent != null) && (itemContent.length() > 0)) { htmlFilterContentScraper scraper = new htmlFilterContentScraper(itemURL); OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); - serverFileUtils.copy(new ByteArrayInputStream(itemContent.getBytes()), os); + serverFileUtils.copy(new ByteArrayInputStream(itemContent.getBytes("UTF-8")), os); String itemHeadline = scraper.getTitle(); if ((itemHeadline != null) && (itemHeadline.length() > 0)) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 404612d39..908c6b6e2 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -146,6 +146,7 @@ import de.anomic.kelondro.kelondroMapTable; import de.anomic.plasma.dbImport.dbImportManager; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverAbstractSwitch; +import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCodings; import de.anomic.server.serverDate; import de.anomic.server.serverInstantThread; @@ -1700,6 +1701,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser storageEndTime = System.currentTimeMillis(); if (log.isInfo()) { + // TODO: UTF-8 docDescription seems not to be displayed correctly because + // of string concatenation log.logInfo("*Indexed " + words + " words in URL " + entry.url() + " [" + entry.urlHash() + "]" + "\n\tDescription: " + docDescription +