|
|
@ -214,13 +214,15 @@ public final class TextParser {
|
|
|
|
mimeType = normalizeMimeType(mimeType);
|
|
|
|
mimeType = normalizeMimeType(mimeType);
|
|
|
|
final String fileExt = location.getFileExtension();
|
|
|
|
final String fileExt = location.getFileExtension();
|
|
|
|
final String documentCharset = htmlParser.patchCharsetEncoding(charset);
|
|
|
|
final String documentCharset = htmlParser.patchCharsetEncoding(charset);
|
|
|
|
List<Idiom> idioms = idiomParser(location, mimeType);
|
|
|
|
List<Idiom> idioms = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
if (idioms.isEmpty()) {
|
|
|
|
idioms = idiomParser(location, mimeType);
|
|
|
|
final String errorMsg = "No parser available to parse extension '" + location.getFileExtension() + "' or mimetype '" + mimeType + "'";
|
|
|
|
} catch (ParserException e) {
|
|
|
|
log.logInfo("Unable to parse '" + location + "'. " + errorMsg);
|
|
|
|
final String errorMsg = "Parser Failure for extension '" + location.getFileExtension() + "' or mimetype '" + mimeType + "': " + e.getMessage();
|
|
|
|
|
|
|
|
log.logWarning(errorMsg);
|
|
|
|
throw new ParserException(errorMsg, location);
|
|
|
|
throw new ParserException(errorMsg, location);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert !idioms.isEmpty();
|
|
|
|
|
|
|
|
|
|
|
|
if (log.isFine()) log.logInfo("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "'.");
|
|
|
|
if (log.isFine()) log.logInfo("Parsing " + location + " with mimeType '" + mimeType + "' and file extension '" + fileExt + "'.");
|
|
|
|
|
|
|
|
|
|
|
@ -310,6 +312,9 @@ public final class TextParser {
|
|
|
|
idiom = mime2parser.get(mimeType2);
|
|
|
|
idiom = mime2parser.get(mimeType2);
|
|
|
|
if (idiom != null && !idioms.contains(idiom)) idioms.add(idiom);
|
|
|
|
if (idiom != null && !idioms.contains(idiom)) idioms.add(idiom);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// finall check if we found any parser
|
|
|
|
|
|
|
|
if (idioms.isEmpty()) throw new ParserException("no parser found for extension '" + ext + "' and mime type '" + mimeType1 + "'", url);
|
|
|
|
|
|
|
|
|
|
|
|
return idioms;
|
|
|
|
return idioms;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|