Made mime type and extension normalization locale independent.

Previously, upper cased mime type was incorrectly normalized when the
default locale is Turkish.
pull/127/head
luccioman 8 years ago
parent 319231a458
commit 286f3018bd

@ -29,6 +29,7 @@ import java.io.InputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@ -128,6 +129,15 @@ public final class TextParser {
for (Set<Parser> pl: mime2parser.values()) c.addAll(pl);
return c;
}
/**
* @return the set of all supported mime types
*/
public static Set<String> supportedMimeTypes() {
final Set<String> mimeTypes = new HashSet<>();
mimeTypes.addAll(mime2parser.keySet());
return mimeTypes;
}
private static void initParser(final Parser parser) {
String prototypeMime = null;
@ -145,7 +155,7 @@ public final class TextParser {
}
if (prototypeMime != null) for (String ext: parser.supportedExtensions()) {
ext = ext.toLowerCase();
ext = ext.toLowerCase(Locale.ROOT);
final String s = ext2mime.get(ext);
if (s != null && !s.equals(prototypeMime)) AbstractParser.log.info("Parser for extension '" + ext + "' was set to mime '" + s + "', overwriting with new mime '" + prototypeMime + "'.");
ext2mime.put(ext, prototypeMime);
@ -153,7 +163,7 @@ public final class TextParser {
for (String ext: parser.supportedExtensions()) {
// process the extensions
ext = ext.toLowerCase();
ext = ext.toLowerCase(Locale.ROOT);
LinkedHashSet<Parser> p0 = ext2parser.get(ext);
if (p0 == null) {
p0 = new LinkedHashSet<Parser>();
@ -518,12 +528,12 @@ public final class TextParser {
}
public static String mimeOf(final String ext) {
return ext2mime.get(ext.toLowerCase());
return ext2mime.get(ext.toLowerCase(Locale.ROOT));
}
private static String normalizeMimeType(String mimeType) {
if (mimeType == null) return "application/octet-stream";
mimeType = mimeType.toLowerCase();
mimeType = mimeType.toLowerCase(Locale.ROOT);
final int pos = mimeType.indexOf(';');
return ((pos < 0) ? mimeType.trim() : mimeType.substring(0, pos).trim());
}

@ -0,0 +1,55 @@
// TextParserTest.java
// ---------------------------
// Copyright 2017 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.document;
import static org.junit.Assert.*;
import java.util.Locale;
import org.junit.Test;
/**
* Unit tests for the {@link TextParser} class.
*
* @author luccioman
*
*/
public class TextParserTest {
/**
* Test the TextParser.supportsMime() consistency with available locales.
* Possible failure case : with the Turkish ("tr") language, 'I' lower cased
* does not becomes 'i' but '\u005Cu0131' (the latin small letter 'ı'
* character).
*/
@Test
public void testSupportsMimeLocaleConsistency() {
for (Locale locale : Locale.getAvailableLocales()) {
Locale.setDefault(locale);
for (String mimeType : TextParser.supportedMimeTypes()) {
assertNull(locale + " " + mimeType, TextParser.supportsMime(mimeType.toUpperCase(Locale.ROOT)));
}
}
}
}
Loading…
Cancel
Save