*) Better errorhandling for charset encoding problem during content parsing

See: http://www.yacy-forum.de/viewtopic.php?t=2952

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2737 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 18 years ago
parent 89ee215ff0
commit 22649408ad

@ -86,6 +86,7 @@ public class plasmaCrawlEURL extends indexURL {
// wrong content
public static final String DENIED_WRONG_MIMETYPE_OR_EXT = "denied_(wrong_mimetype_or_extension)";
public static final String DENIED_UNSUPPORTED_CHARSET = "denied_(unsupported_charset)";
public static final String DENIED_REDIRECTION_HEADER_EMPTY = "denied_(redirection_header_empty)";
public static final String DENIED_REDIRECTION_COUNTER_EXCEEDED = "denied_(redirection_counter_exceeded)";
public static final String DENIED_WRONG_HTTP_STATUSCODE = "denied_(wrong_http_status_code_";

@ -52,6 +52,7 @@ import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.util.Arrays;
@ -600,6 +601,10 @@ public final class plasmaParser {
}
return doc;
} catch (UnsupportedEncodingException e) {
String errorMsg = "Unsupported charset encoding: " + e.getMessage();
this.theLogger.logSevere("Unable to parse '" + location + "'. " + errorMsg, e);
throw new ParserException(errorMsg,location,plasmaCrawlEURL.DENIED_UNSUPPORTED_CHARSET);
} catch (Exception e) {
// Interrupted- and Parser-Exceptions should pass through
if (e instanceof InterruptedException) throw (InterruptedException) e;

Loading…
Cancel
Save