- small adjustment to make sure genericParser is tried last

-- for some documents genericParser grabs document instead of specific available parser due to unordered pick of 1st to try parser
      (like .ps .rdf files and other)
- remove redundant file extension registration
pull/1/head
reger 11 years ago
parent 3e901dcb06
commit aa1a1f1d2c

@ -28,6 +28,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@ -359,7 +360,7 @@ public final class TextParser {
* @throws Parser.Failure
*/
private static Set<Parser> parsers(final MultiProtocolURI url, String mimeType1) throws Parser.Failure {
final Set<Parser> idioms = new HashSet<Parser>(2);
final Set<Parser> idioms = new LinkedHashSet<Parser>(2); // LinkedSet to maintain order (genericParser should be last)
// check extension
String ext = MultiProtocolURI.getFileExtension(url.getFileName());
@ -383,7 +384,7 @@ public final class TextParser {
final String mimeType2 = ext2mime.get(ext);
if (mimeType2 != null && (idiom = mime2parser.get(mimeType2)) != null && !idioms.contains(idiom)) idioms.addAll(idiom);
// always add the generic parser
// always add the generic parser (make sure it is the last in access order)
idioms.add(genericIdiom);
//if (idioms.isEmpty()) throw new Parser.Failure("no parser found for extension '" + ext + "' and mime type '" + mimeType1 + "'", url);

@ -27,11 +27,10 @@ public class AugmentParser extends AbstractParser implements Parser {
ConcurrentLog.info("AugmentedParser", "augmented parser was initialized");
this.SUPPORTED_EXTENSIONS.add("html");
this.SUPPORTED_EXTENSIONS.add("htm");
this.SUPPORTED_EXTENSIONS.add("php");
this.SUPPORTED_MIME_TYPES.add("text/html");
this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml");
this.SUPPORTED_EXTENSIONS.add("html");
this.SUPPORTED_EXTENSIONS.add("htm");
}
@Override

@ -36,11 +36,10 @@ public class RDFaParser extends AbstractParser implements Parser {
this.hp = new htmlParser();
this.SUPPORTED_EXTENSIONS.add("html");
this.SUPPORTED_EXTENSIONS.add("htm");
this.SUPPORTED_EXTENSIONS.add("php");
this.SUPPORTED_MIME_TYPES.add("text/html");
this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml");
this.SUPPORTED_EXTENSIONS.add("html");
this.SUPPORTED_EXTENSIONS.add("htm");
}
@Override

Loading…
Cancel
Save