From 9531b835988f971986b466dbb3e095a4355ec896 Mon Sep 17 00:00:00 2001 From: luccioman Date: Mon, 20 Nov 2017 09:48:46 +0100 Subject: [PATCH] Do locale neutral case conversions in Classification Required for people using Turkish language as their default system locale, as with this locale the 'i' character has different upper and lower case flavors than with other locales. --- .../document/analysis/Classification.java | 33 ++++++------ .../document/analysis/ClassificationTest.java | 53 +++++++++++++++++-- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/source/net/yacy/cora/document/analysis/Classification.java b/source/net/yacy/cora/document/analysis/Classification.java index ba0e493ab..ba3919c49 100644 --- a/source/net/yacy/cora/document/analysis/Classification.java +++ b/source/net/yacy/cora/document/analysis/Classification.java @@ -24,6 +24,7 @@ import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.util.HashSet; +import java.util.Locale; import java.util.Map.Entry; import java.util.Properties; import java.util.Set; @@ -105,47 +106,47 @@ public class Classification { private static void addSet(Set set, final String extString) { if ((extString == null) || (extString.isEmpty())) return; - for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase().trim()); + for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase(Locale.ROOT).trim()); } public static boolean isTextExtension(String textExt) { if (textExt == null) return false; - return textExtSet.contains(textExt.trim().toLowerCase()); + return textExtSet.contains(textExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isMediaExtension(String mediaExt) { if (mediaExt == null) return false; - return mediaExtSet.contains(mediaExt.trim().toLowerCase()); + return mediaExtSet.contains(mediaExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isImageExtension(final String imageExt) { if (imageExt == null) return false; - return imageExtSet.contains(imageExt.trim().toLowerCase()); + return imageExtSet.contains(imageExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isAudioExtension(final String audioExt) { if (audioExt == null) return false; - return audioExtSet.contains(audioExt.trim().toLowerCase()); + return audioExtSet.contains(audioExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isVideoExtension(final String videoExt) { if (videoExt == null) return false; - return videoExtSet.contains(videoExt.trim().toLowerCase()); + return videoExtSet.contains(videoExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isApplicationExtension(final String appsExt) { if (appsExt == null) return false; - return appsExtSet.contains(appsExt.trim().toLowerCase()); + return appsExtSet.contains(appsExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isControlExtension(final String ctrlExt) { if (ctrlExt == null) return false; - return ctrlExtSet.contains(ctrlExt.trim().toLowerCase()); + return ctrlExtSet.contains(ctrlExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isAnyKnownExtension(String ext) { if (ext == null) return false; - ext = ext.trim().toLowerCase(); + ext = ext.trim().toLowerCase(Locale.ROOT); return textExtSet.contains(ext) || mediaExtSet.contains(ext) || ctrlExtSet.contains(ext); } @@ -182,7 +183,7 @@ public class Classification { public static boolean isPictureMime(final String mimeType) { if (mimeType == null) return false; - return mimeType.toUpperCase().startsWith("IMAGE"); + return mimeType.toUpperCase(Locale.ROOT).startsWith("IMAGE"); } private static final Properties mimeTable = new Properties(); @@ -202,10 +203,10 @@ public class Classification { for (Entry entry: mimeTable.entrySet()) { String ext = (String) entry.getKey(); String mime = (String) entry.getValue(); - if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase()); - if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase()); - if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase()); - if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase()); + if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase(Locale.ROOT)); + if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase(Locale.ROOT)); + if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase(Locale.ROOT)); + if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase(Locale.ROOT)); } } @@ -214,11 +215,11 @@ public class Classification { } public static String ext2mime(final String ext) { - return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext)); + return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext)); } public static String ext2mime(final String ext, final String dfltMime) { - return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(), dfltMime); + return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), dfltMime); } public static String url2mime(final MultiProtocolURL url, final String dfltMime) { diff --git a/test/java/net/yacy/cora/document/analysis/ClassificationTest.java b/test/java/net/yacy/cora/document/analysis/ClassificationTest.java index 670253d2b..b8fe0558c 100644 --- a/test/java/net/yacy/cora/document/analysis/ClassificationTest.java +++ b/test/java/net/yacy/cora/document/analysis/ClassificationTest.java @@ -20,6 +20,7 @@ package net.yacy.cora.document.analysis; import java.io.File; + import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; @@ -42,9 +43,55 @@ public class ClassificationTest { */ @Test public void testExt2mime_String() { - String mime; - mime = Classification.ext2mime("Z"); - assertEquals("application/x-compress", mime); + assertEquals("application/x-compress", Classification.ext2mime("Z")); + assertEquals("application/x-compress", Classification.ext2mime("z")); + + assertEquals("image/tiff", Classification.ext2mime("TIFF")); + assertEquals("image/tiff", Classification.ext2mime("tiff")); + + assertEquals("image/tiff", Classification.ext2mime("TIFF", "image/tiff")); + assertEquals("image/tiff", Classification.ext2mime("tiff", "image/tiff")); + } + + /** + * Test of isNNNExtension methods with lower and upper case samples, containing + * notably the 'i' character which case conversion is different whith the Turkish + * locale. THis test be successful with any default system locale. + */ + @Test + public void testIsExtension() { + assertTrue(Classification.isApplicationExtension("ISO")); + assertTrue(Classification.isApplicationExtension("iso")); + + assertTrue(Classification.isAudioExtension("AIF")); + assertTrue(Classification.isAudioExtension("aif")); + + assertTrue(Classification.isVideoExtension("AVI")); + assertTrue(Classification.isVideoExtension("avi")); + + assertTrue(Classification.isImageExtension("GIF")); + assertTrue(Classification.isImageExtension("gif")); + + assertTrue(Classification.isControlExtension("SHA1")); + assertTrue(Classification.isControlExtension("sha1")); + + assertTrue(Classification.isMediaExtension("GIF")); + assertTrue(Classification.isMediaExtension("gif")); + + assertTrue(Classification.isAnyKnownExtension("GIF")); + assertTrue(Classification.isAnyKnownExtension("gif")); + } + + /** + * Test of isPictureMime method with some sample media types. + */ + @Test + public void testIsPictureMime() { + assertTrue(Classification.isPictureMime("image/jpeg")); + assertTrue(Classification.isPictureMime("IMAGE/JPEG")); + + assertFalse(Classification.isPictureMime("text/html")); + assertFalse(Classification.isPictureMime("TEXT/HTML")); } }