diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java index 857e04b47..33acb90c7 100644 --- a/source/net/yacy/document/TextParser.java +++ b/source/net/yacy/document/TextParser.java @@ -29,6 +29,7 @@ import java.io.InputStream; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -128,6 +129,15 @@ public final class TextParser { for (Set pl: mime2parser.values()) c.addAll(pl); return c; } + + /** + * @return the set of all supported mime types + */ + public static Set supportedMimeTypes() { + final Set mimeTypes = new HashSet<>(); + mimeTypes.addAll(mime2parser.keySet()); + return mimeTypes; + } private static void initParser(final Parser parser) { String prototypeMime = null; @@ -145,7 +155,7 @@ public final class TextParser { } if (prototypeMime != null) for (String ext: parser.supportedExtensions()) { - ext = ext.toLowerCase(); + ext = ext.toLowerCase(Locale.ROOT); final String s = ext2mime.get(ext); if (s != null && !s.equals(prototypeMime)) AbstractParser.log.info("Parser for extension '" + ext + "' was set to mime '" + s + "', overwriting with new mime '" + prototypeMime + "'."); ext2mime.put(ext, prototypeMime); @@ -153,7 +163,7 @@ public final class TextParser { for (String ext: parser.supportedExtensions()) { // process the extensions - ext = ext.toLowerCase(); + ext = ext.toLowerCase(Locale.ROOT); LinkedHashSet p0 = ext2parser.get(ext); if (p0 == null) { p0 = new LinkedHashSet(); @@ -518,12 +528,12 @@ public final class TextParser { } public static String mimeOf(final String ext) { - return ext2mime.get(ext.toLowerCase()); + return ext2mime.get(ext.toLowerCase(Locale.ROOT)); } private static String normalizeMimeType(String mimeType) { if (mimeType == null) return "application/octet-stream"; - mimeType = mimeType.toLowerCase(); + mimeType = mimeType.toLowerCase(Locale.ROOT); final int pos = mimeType.indexOf(';'); return ((pos < 0) ? mimeType.trim() : mimeType.substring(0, pos).trim()); } diff --git a/test/java/net/yacy/document/TextParserTest.java b/test/java/net/yacy/document/TextParserTest.java new file mode 100644 index 000000000..2d874d592 --- /dev/null +++ b/test/java/net/yacy/document/TextParserTest.java @@ -0,0 +1,55 @@ +// TextParserTest.java +// --------------------------- +// Copyright 2017 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.document; + +import static org.junit.Assert.*; + +import java.util.Locale; + +import org.junit.Test; + +/** + * Unit tests for the {@link TextParser} class. + * + * @author luccioman + * + */ +public class TextParserTest { + + /** + * Test the TextParser.supportsMime() consistency with available locales. + * Possible failure case : with the Turkish ("tr") language, 'I' lower cased + * does not becomes 'i' but '\u005Cu0131' (the latin small letter 'ı' + * character). + */ + @Test + public void testSupportsMimeLocaleConsistency() { + for (Locale locale : Locale.getAvailableLocales()) { + Locale.setDefault(locale); + for (String mimeType : TextParser.supportedMimeTypes()) { + assertNull(locale + " " + mimeType, TextParser.supportsMime(mimeType.toUpperCase(Locale.ROOT))); + } + } + } + +}