optimize order of parsers to try

- start with a parser matching the remote supplied mime
pull/23/head
reger 10 years ago
parent 681889ae64
commit 8532565c7d

@ -375,9 +375,10 @@ public final class TextParser {
* because mime types returned by web severs are sometimes wrong, we also compute the mime type again
* from the extension that can be extracted from the url path. That means that there are 3 criteria
* that can be used to select a parser:
* - the given extension
* - the given mime type
* - the mime type computed from the extension
* - the given mime type (1.)
* - the extension of url (2.)
* - the mime type computed from the extension (3.)
* finally the generic parser is added as backup if all above fail
* @param url the given url
* @param mimeType the given mime type
* @return a list of Idiom parsers that may be appropriate for the given criteria
@ -386,26 +387,30 @@ public final class TextParser {
private static Set<Parser> parsers(final MultiProtocolURL url, String mimeType1) throws Parser.Failure {
final Set<Parser> idioms = new LinkedHashSet<Parser>(2); // LinkedSet to maintain order (genericParser should be last)
// check extension
String ext = MultiProtocolURL.getFileExtension(url.getFileName());
// check given mime type, place this first because this is the most likely to work and the best fit to the supplied mime
Set<Parser> idiom;
if (ext != null && ext.length() > 0) {
if (denyExtensionx.containsKey(ext)) throw new Parser.Failure("file extension '" + ext + "' is denied (1)", url);
idiom = ext2parser.get(ext);
if (idiom != null) idioms.addAll(idiom);
}
// check given mime type
if (mimeType1 != null) {
mimeType1 = normalizeMimeType(mimeType1);
if (denyMime.containsKey(mimeType1)) throw new Parser.Failure("mime type '" + mimeType1 + "' is denied (1)", url);
idiom = mime2parser.get(mimeType1);
if (idiom != null && !idioms.contains(idiom)) idioms.addAll(idiom);
if (idiom != null) idioms.addAll(idiom);
}
// check extension and add as backup (in case no, wrong or unknown/unsupported mime was suppied)
String ext = MultiProtocolURL.getFileExtension(url.getFileName());
if (ext != null && ext.length() > 0) {
if (denyExtensionx.containsKey(ext)) throw new Parser.Failure("file extension '" + ext + "' is denied (1)", url);
idiom = ext2parser.get(ext);
if (idiom != null && !idioms.containsAll(idiom)) { // use containsAll -> idiom is a Set of parser
idioms.addAll(idiom);
}
}
// check mime type computed from extension
final String mimeType2 = ext2mime.get(ext);
if (mimeType2 != null && (idiom = mime2parser.get(mimeType2)) != null && !idioms.contains(idiom)) idioms.addAll(idiom);
if (mimeType2 != null && (idiom = mime2parser.get(mimeType2)) != null && !idioms.containsAll(idiom)) { // use containsAll -> idiom is a Set of parser
idioms.addAll(idiom);
}
// always add the generic parser (make sure it is the last in access order)
idioms.add(genericIdiom);

Loading…
Cancel
Save