From decb09df6d180378aca2a803b0dae2c2d3cbfc85 Mon Sep 17 00:00:00 2001 From: theli Date: Fri, 13 Oct 2006 05:30:20 +0000 Subject: [PATCH] *) Trying to be more tolerant against wrong charset names git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2760 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaParser.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 14763fc58..0dd68265f 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -352,6 +352,18 @@ public final class plasmaParser { else if ((c >= '0') && (c <= '9')) encoding = "windows-" + encoding.substring(7); } + if (encoding.toLowerCase().startsWith("iso") && encoding.length() > 3) { + char c = encoding.charAt(3); + if (c == '_') encoding = "ISO-" + encoding.substring(4); + else if ((c >= '0') && (c <= '9')) encoding = "ISO-" + encoding.substring(3); + } + + if (encoding.toLowerCase().startsWith("iso") && encoding.length() > 8) { + char c = encoding.charAt(8); + if (c == '_') encoding = encoding.substring(0,8) + "-" + encoding.substring(9); + else if ((c >= '0') && (c <= '9')) encoding = encoding.substring(0,8) + "-" + encoding.substring(8); + } + return encoding; }