diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java index 7e9d35f4d..38bbabeae 100644 --- a/source/net/yacy/document/TextParser.java +++ b/source/net/yacy/document/TextParser.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -359,7 +360,7 @@ public final class TextParser { * @throws Parser.Failure */ private static Set parsers(final MultiProtocolURI url, String mimeType1) throws Parser.Failure { - final Set idioms = new HashSet(2); + final Set idioms = new LinkedHashSet(2); // LinkedSet to maintain order (genericParser should be last) // check extension String ext = MultiProtocolURI.getFileExtension(url.getFileName()); @@ -383,7 +384,7 @@ public final class TextParser { final String mimeType2 = ext2mime.get(ext); if (mimeType2 != null && (idiom = mime2parser.get(mimeType2)) != null && !idioms.contains(idiom)) idioms.addAll(idiom); - // always add the generic parser + // always add the generic parser (make sure it is the last in access order) idioms.add(genericIdiom); //if (idioms.isEmpty()) throw new Parser.Failure("no parser found for extension '" + ext + "' and mime type '" + mimeType1 + "'", url); diff --git a/source/net/yacy/document/parser/augment/AugmentParser.java b/source/net/yacy/document/parser/augment/AugmentParser.java index efa9d423d..46f17796b 100644 --- a/source/net/yacy/document/parser/augment/AugmentParser.java +++ b/source/net/yacy/document/parser/augment/AugmentParser.java @@ -27,11 +27,10 @@ public class AugmentParser extends AbstractParser implements Parser { ConcurrentLog.info("AugmentedParser", "augmented parser was initialized"); this.SUPPORTED_EXTENSIONS.add("html"); + this.SUPPORTED_EXTENSIONS.add("htm"); this.SUPPORTED_EXTENSIONS.add("php"); this.SUPPORTED_MIME_TYPES.add("text/html"); this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml"); - this.SUPPORTED_EXTENSIONS.add("html"); - this.SUPPORTED_EXTENSIONS.add("htm"); } @Override diff --git a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java index 8adde28be..45978122d 100644 --- a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java +++ b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java @@ -36,11 +36,10 @@ public class RDFaParser extends AbstractParser implements Parser { this.hp = new htmlParser(); this.SUPPORTED_EXTENSIONS.add("html"); + this.SUPPORTED_EXTENSIONS.add("htm"); this.SUPPORTED_EXTENSIONS.add("php"); this.SUPPORTED_MIME_TYPES.add("text/html"); this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml"); - this.SUPPORTED_EXTENSIONS.add("html"); - this.SUPPORTED_EXTENSIONS.add("htm"); } @Override