diff --git a/source/net/yacy/cora/document/analysis/Classification.java b/source/net/yacy/cora/document/analysis/Classification.java index ba0e493ab..ba3919c49 100644 --- a/source/net/yacy/cora/document/analysis/Classification.java +++ b/source/net/yacy/cora/document/analysis/Classification.java @@ -24,6 +24,7 @@ import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.util.HashSet; +import java.util.Locale; import java.util.Map.Entry; import java.util.Properties; import java.util.Set; @@ -105,47 +106,47 @@ public class Classification { private static void addSet(Set set, final String extString) { if ((extString == null) || (extString.isEmpty())) return; - for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase().trim()); + for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase(Locale.ROOT).trim()); } public static boolean isTextExtension(String textExt) { if (textExt == null) return false; - return textExtSet.contains(textExt.trim().toLowerCase()); + return textExtSet.contains(textExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isMediaExtension(String mediaExt) { if (mediaExt == null) return false; - return mediaExtSet.contains(mediaExt.trim().toLowerCase()); + return mediaExtSet.contains(mediaExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isImageExtension(final String imageExt) { if (imageExt == null) return false; - return imageExtSet.contains(imageExt.trim().toLowerCase()); + return imageExtSet.contains(imageExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isAudioExtension(final String audioExt) { if (audioExt == null) return false; - return audioExtSet.contains(audioExt.trim().toLowerCase()); + return audioExtSet.contains(audioExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isVideoExtension(final String videoExt) { if (videoExt == null) return false; - return videoExtSet.contains(videoExt.trim().toLowerCase()); + return videoExtSet.contains(videoExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isApplicationExtension(final String appsExt) { if (appsExt == null) return false; - return appsExtSet.contains(appsExt.trim().toLowerCase()); + return appsExtSet.contains(appsExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isControlExtension(final String ctrlExt) { if (ctrlExt == null) return false; - return ctrlExtSet.contains(ctrlExt.trim().toLowerCase()); + return ctrlExtSet.contains(ctrlExt.trim().toLowerCase(Locale.ROOT)); } public static boolean isAnyKnownExtension(String ext) { if (ext == null) return false; - ext = ext.trim().toLowerCase(); + ext = ext.trim().toLowerCase(Locale.ROOT); return textExtSet.contains(ext) || mediaExtSet.contains(ext) || ctrlExtSet.contains(ext); } @@ -182,7 +183,7 @@ public class Classification { public static boolean isPictureMime(final String mimeType) { if (mimeType == null) return false; - return mimeType.toUpperCase().startsWith("IMAGE"); + return mimeType.toUpperCase(Locale.ROOT).startsWith("IMAGE"); } private static final Properties mimeTable = new Properties(); @@ -202,10 +203,10 @@ public class Classification { for (Entry entry: mimeTable.entrySet()) { String ext = (String) entry.getKey(); String mime = (String) entry.getValue(); - if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase()); - if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase()); - if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase()); - if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase()); + if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase(Locale.ROOT)); + if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase(Locale.ROOT)); + if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase(Locale.ROOT)); + if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase(Locale.ROOT)); } } @@ -214,11 +215,11 @@ public class Classification { } public static String ext2mime(final String ext) { - return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext)); + return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext)); } public static String ext2mime(final String ext, final String dfltMime) { - return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(), dfltMime); + return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), dfltMime); } public static String url2mime(final MultiProtocolURL url, final String dfltMime) { diff --git a/test/java/net/yacy/cora/document/analysis/ClassificationTest.java b/test/java/net/yacy/cora/document/analysis/ClassificationTest.java index 670253d2b..b8fe0558c 100644 --- a/test/java/net/yacy/cora/document/analysis/ClassificationTest.java +++ b/test/java/net/yacy/cora/document/analysis/ClassificationTest.java @@ -20,6 +20,7 @@ package net.yacy.cora.document.analysis; import java.io.File; + import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; @@ -42,9 +43,55 @@ public class ClassificationTest { */ @Test public void testExt2mime_String() { - String mime; - mime = Classification.ext2mime("Z"); - assertEquals("application/x-compress", mime); + assertEquals("application/x-compress", Classification.ext2mime("Z")); + assertEquals("application/x-compress", Classification.ext2mime("z")); + + assertEquals("image/tiff", Classification.ext2mime("TIFF")); + assertEquals("image/tiff", Classification.ext2mime("tiff")); + + assertEquals("image/tiff", Classification.ext2mime("TIFF", "image/tiff")); + assertEquals("image/tiff", Classification.ext2mime("tiff", "image/tiff")); + } + + /** + * Test of isNNNExtension methods with lower and upper case samples, containing + * notably the 'i' character which case conversion is different whith the Turkish + * locale. THis test be successful with any default system locale. + */ + @Test + public void testIsExtension() { + assertTrue(Classification.isApplicationExtension("ISO")); + assertTrue(Classification.isApplicationExtension("iso")); + + assertTrue(Classification.isAudioExtension("AIF")); + assertTrue(Classification.isAudioExtension("aif")); + + assertTrue(Classification.isVideoExtension("AVI")); + assertTrue(Classification.isVideoExtension("avi")); + + assertTrue(Classification.isImageExtension("GIF")); + assertTrue(Classification.isImageExtension("gif")); + + assertTrue(Classification.isControlExtension("SHA1")); + assertTrue(Classification.isControlExtension("sha1")); + + assertTrue(Classification.isMediaExtension("GIF")); + assertTrue(Classification.isMediaExtension("gif")); + + assertTrue(Classification.isAnyKnownExtension("GIF")); + assertTrue(Classification.isAnyKnownExtension("gif")); + } + + /** + * Test of isPictureMime method with some sample media types. + */ + @Test + public void testIsPictureMime() { + assertTrue(Classification.isPictureMime("image/jpeg")); + assertTrue(Classification.isPictureMime("IMAGE/JPEG")); + + assertFalse(Classification.isPictureMime("text/html")); + assertFalse(Classification.isPictureMime("TEXT/HTML")); } }