Do locale neutral case conversions in Classification

Required for people using Turkish language as their default system
locale, as with this locale the 'i' character has different upper and
lower case flavors than with other locales.
pull/144/head
luccioman 7 years ago
parent bab5f0485f
commit 9531b83598

@ -24,6 +24,7 @@ import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
@ -105,47 +106,47 @@ public class Classification {
private static void addSet(Set<String> set, final String extString) {
if ((extString == null) || (extString.isEmpty())) return;
for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase().trim());
for (String s: CommonPattern.COMMA.split(extString, 0)) set.add(s.toLowerCase(Locale.ROOT).trim());
}
public static boolean isTextExtension(String textExt) {
if (textExt == null) return false;
return textExtSet.contains(textExt.trim().toLowerCase());
return textExtSet.contains(textExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isMediaExtension(String mediaExt) {
if (mediaExt == null) return false;
return mediaExtSet.contains(mediaExt.trim().toLowerCase());
return mediaExtSet.contains(mediaExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isImageExtension(final String imageExt) {
if (imageExt == null) return false;
return imageExtSet.contains(imageExt.trim().toLowerCase());
return imageExtSet.contains(imageExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isAudioExtension(final String audioExt) {
if (audioExt == null) return false;
return audioExtSet.contains(audioExt.trim().toLowerCase());
return audioExtSet.contains(audioExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isVideoExtension(final String videoExt) {
if (videoExt == null) return false;
return videoExtSet.contains(videoExt.trim().toLowerCase());
return videoExtSet.contains(videoExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isApplicationExtension(final String appsExt) {
if (appsExt == null) return false;
return appsExtSet.contains(appsExt.trim().toLowerCase());
return appsExtSet.contains(appsExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isControlExtension(final String ctrlExt) {
if (ctrlExt == null) return false;
return ctrlExtSet.contains(ctrlExt.trim().toLowerCase());
return ctrlExtSet.contains(ctrlExt.trim().toLowerCase(Locale.ROOT));
}
public static boolean isAnyKnownExtension(String ext) {
if (ext == null) return false;
ext = ext.trim().toLowerCase();
ext = ext.trim().toLowerCase(Locale.ROOT);
return textExtSet.contains(ext) || mediaExtSet.contains(ext) || ctrlExtSet.contains(ext);
}
@ -182,7 +183,7 @@ public class Classification {
public static boolean isPictureMime(final String mimeType) {
if (mimeType == null) return false;
return mimeType.toUpperCase().startsWith("IMAGE");
return mimeType.toUpperCase(Locale.ROOT).startsWith("IMAGE");
}
private static final Properties mimeTable = new Properties();
@ -202,10 +203,10 @@ public class Classification {
for (Entry<Object, Object> entry: mimeTable.entrySet()) {
String ext = (String) entry.getKey();
String mime = (String) entry.getValue();
if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase());
if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase());
if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase());
if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase());
if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase(Locale.ROOT));
if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase(Locale.ROOT));
if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase(Locale.ROOT));
if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase(Locale.ROOT));
}
}
@ -214,11 +215,11 @@ public class Classification {
}
public static String ext2mime(final String ext) {
return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext));
return ext == null ? "application/octet-stream" : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), "application/" + (ext == null || ext.length() == 0 ? "octet-stream" : ext));
}
public static String ext2mime(final String ext, final String dfltMime) {
return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(), dfltMime);
return ext == null ? dfltMime : mimeTable.getProperty(ext.toLowerCase(Locale.ROOT), dfltMime);
}
public static String url2mime(final MultiProtocolURL url, final String dfltMime) {

@ -20,6 +20,7 @@
package net.yacy.cora.document.analysis;
import java.io.File;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
@ -42,9 +43,55 @@ public class ClassificationTest {
*/
@Test
public void testExt2mime_String() {
String mime;
mime = Classification.ext2mime("Z");
assertEquals("application/x-compress", mime);
assertEquals("application/x-compress", Classification.ext2mime("Z"));
assertEquals("application/x-compress", Classification.ext2mime("z"));
assertEquals("image/tiff", Classification.ext2mime("TIFF"));
assertEquals("image/tiff", Classification.ext2mime("tiff"));
assertEquals("image/tiff", Classification.ext2mime("TIFF", "image/tiff"));
assertEquals("image/tiff", Classification.ext2mime("tiff", "image/tiff"));
}
/**
* Test of isNNNExtension methods with lower and upper case samples, containing
* notably the 'i' character which case conversion is different whith the Turkish
* locale. THis test be successful with any default system locale.
*/
@Test
public void testIsExtension() {
assertTrue(Classification.isApplicationExtension("ISO"));
assertTrue(Classification.isApplicationExtension("iso"));
assertTrue(Classification.isAudioExtension("AIF"));
assertTrue(Classification.isAudioExtension("aif"));
assertTrue(Classification.isVideoExtension("AVI"));
assertTrue(Classification.isVideoExtension("avi"));
assertTrue(Classification.isImageExtension("GIF"));
assertTrue(Classification.isImageExtension("gif"));
assertTrue(Classification.isControlExtension("SHA1"));
assertTrue(Classification.isControlExtension("sha1"));
assertTrue(Classification.isMediaExtension("GIF"));
assertTrue(Classification.isMediaExtension("gif"));
assertTrue(Classification.isAnyKnownExtension("GIF"));
assertTrue(Classification.isAnyKnownExtension("gif"));
}
/**
* Test of isPictureMime method with some sample media types.
*/
@Test
public void testIsPictureMime() {
assertTrue(Classification.isPictureMime("image/jpeg"));
assertTrue(Classification.isPictureMime("IMAGE/JPEG"));
assertFalse(Classification.isPictureMime("text/html"));
assertFalse(Classification.isPictureMime("TEXT/HTML"));
}
}

Loading…
Cancel
Save