From c5d30209413350be0bf106c26dbba135ebd4bd0b Mon Sep 17 00:00:00 2001 From: theli Date: Fri, 15 Sep 2006 12:56:01 +0000 Subject: [PATCH] *) better errorhandling for last commit git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2592 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../anomic/htmlFilter/htmlFilterContentScraper.java | 2 ++ source/de/anomic/plasma/plasmaParser.java | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 9fb48ffd2..995395a6e 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -114,6 +114,8 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen } public void setCharset(String charset) throws UnsupportedCharsetException { + if (charset == null) return; + // testing if charset exists Charset.forName(charset); diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index b97b68aa5..6102f8766 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -53,6 +53,8 @@ import java.io.IOException; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URI; +import java.nio.charset.UnsupportedCharsetException; + import de.anomic.net.URL; import java.util.Arrays; import java.util.HashMap; @@ -550,7 +552,16 @@ public final class plasmaParser { } else if (realtimeParsableMimeTypesContains(mimeType)) { // ...otherwise we make a scraper and transformer htmlFilterContentScraper scraper = new htmlFilterContentScraper(location); - scraper.setCharset(PARSER_MODE_URLREDIRECTOR); + + // set the charset if known + if (charset != null) { + try { + scraper.setCharset(charset); + } catch (UnsupportedCharsetException e) { + serverLog.logWarning("PARSER", "parseSource2: unknown or unsupported charset '" + charset + "'"); + return null; + } + } OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false); serverFileUtils.copy(sourceFile, hfos);