diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index 8d9918354..f53dd0f55 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -86,6 +86,7 @@ public class plasmaCrawlEURL extends indexURL { // wrong content public static final String DENIED_WRONG_MIMETYPE_OR_EXT = "denied_(wrong_mimetype_or_extension)"; + public static final String DENIED_UNSUPPORTED_CHARSET = "denied_(unsupported_charset)"; public static final String DENIED_REDIRECTION_HEADER_EMPTY = "denied_(redirection_header_empty)"; public static final String DENIED_REDIRECTION_COUNTER_EXCEEDED = "denied_(redirection_counter_exceeded)"; public static final String DENIED_WRONG_HTTP_STATUSCODE = "denied_(wrong_http_status_code_"; diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 81404dc06..c11984b29 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -52,6 +52,7 @@ import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; import java.util.Arrays; @@ -600,6 +601,10 @@ public final class plasmaParser { } return doc; + } catch (UnsupportedEncodingException e) { + String errorMsg = "Unsupported charset encoding: " + e.getMessage(); + this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e); + throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_UNSUPPORTED_CHARSET); } catch (Exception e) { // Interrupted- and Parser-Exceptions should pass through if (e instanceof InterruptedException) throw (InterruptedException) e;