From 64cec2790d3b0816ad551c4c090f40a6e34c5926 Mon Sep 17 00:00:00 2001 From: luccioman Date: Thu, 22 Jun 2017 10:50:34 +0200 Subject: [PATCH] Improved character encoding detection from Content-Type header Also updated some related JavaDocs --- .../yacy/cora/protocol/HeaderFramework.java | 30 +++++++++++++------ .../cora/protocol/HeaderFrameworkTest.java | 14 ++++++++- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/source/net/yacy/cora/protocol/HeaderFramework.java b/source/net/yacy/cora/protocol/HeaderFramework.java index f0bf457f0..cf7f073e2 100644 --- a/source/net/yacy/cora/protocol/HeaderFramework.java +++ b/source/net/yacy/cora/protocol/HeaderFramework.java @@ -377,8 +377,11 @@ public class HeaderFramework extends TreeMap implements MapRFC 7231 (HTTP/1.1) - "Media Type" section + * @see RFC 2045 (MIME) - "Content-Type Header Field" section * @return mime or on missing header field "application/octet-stream" */ public String mime() { @@ -386,9 +389,8 @@ public class HeaderFramework extends TreeMap implements Map 0) { return tmpstr.substring(0, pos).trim(); - } else { - return tmpstr; } + return tmpstr; } /* @@ -398,15 +400,25 @@ public class HeaderFramework extends TreeMap implements MapRFC 7231 (HTTP/1.1) - "Media Type" section + * @see RFC 2045 (MIME) - "Content-Type Header Field" section + */ + public static final String getCharacterEncoding(final String contentType) { + if (contentType == null) return null; - final String[] parts = CommonPattern.SEMICOLON.split(mimeType); + final String[] parts = CommonPattern.SEMICOLON.split(contentType); if (parts == null || parts.length <= 1) return null; for (int i=1; i < parts.length; i++) { final String param = parts[i].trim(); - if (param.startsWith("charset=")) { + if (param.toLowerCase(Locale.ROOT).startsWith("charset=")) { String charset = param.substring("charset=".length()).trim(); if (charset.length() > 0 && (charset.charAt(0) == '\"' || charset.charAt(0) == '\'')) charset = charset.substring(1); if (charset.endsWith("\"") || charset.endsWith("'")) charset = charset.substring(0,charset.length()-1); @@ -453,7 +465,7 @@ public class HeaderFramework extends TreeMap implements Map