From b4016ff324a6e781ab0b9b78801a2ec74c74d867 Mon Sep 17 00:00:00 2001 From: reger Date: Wed, 14 Aug 2013 21:12:10 +0200 Subject: [PATCH] - remove possible double initialization of rdfa parser - use ordered list to use preferred parser for mime/extension first (relates to html, rdfa, argument parser) - harmonize xhtml extension config for the 3 html base parsers --- source/net/yacy/document/TextParser.java | 22 ++++++++++--------- .../parser/augment/AugmentParser.java | 1 + .../document/parser/rdfa/impl/RDFaParser.java | 1 + 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java index 38bbabeae..23fd860c4 100644 --- a/source/net/yacy/document/TextParser.java +++ b/source/net/yacy/document/TextParser.java @@ -73,8 +73,9 @@ public final class TextParser { private static final Object v = new Object(); private static final Parser genericIdiom = new genericParser(); - private static final Map> mime2parser = new ConcurrentHashMap>(); - private static final Map> ext2parser = new ConcurrentHashMap>(); + //use LinkedHashSet for parser collection to use (init) order to prefered parser for same ext or mime + private static final Map> mime2parser = new ConcurrentHashMap>(); + private static final ConcurrentHashMap> ext2parser = new ConcurrentHashMap>(); private static final Map ext2mime = new ConcurrentHashMap(); private static final Map denyMime = new ConcurrentHashMap(); private static final Map denyExtensionx = new ConcurrentHashMap(); @@ -84,7 +85,11 @@ public final class TextParser { initParser(new csvParser()); initParser(new docParser()); initParser(new gzipParser()); - initParser(new htmlParser()); + // AugmentParser calls internally RDFaParser (therefore add before RDFa) + if (Switchboard.getSwitchboard().getConfigBool("parserAugmentation", true)) initParser(new AugmentParser()); + // RDFaParser calls internally htmlParser (therefore add before html) + if (Switchboard.getSwitchboard().getConfigBool("parserAugmentation.RDFa", true)) initParser(new RDFaParser()); + initParser(new htmlParser()); // called within rdfa parser initParser(new genericImageParser()); initParser(new mmParser()); initParser(new odtParser()); @@ -103,12 +108,9 @@ public final class TextParser { initParser(new vsdParser()); initParser(new xlsParser()); initParser(new zipParser()); - initParser(new RDFaParser()); initParser(new rdfParser()); initParser(new audioTagParser()); - if (Switchboard.getSwitchboard().getConfigBool("parserAugmentation.RDFa", true)) initParser(new RDFaParser()); - if (Switchboard.getSwitchboard().getConfigBool("parserAugmentation", true)) initParser(new AugmentParser()); } public static Set parsers() { @@ -124,9 +126,9 @@ public final class TextParser { // process the mime types final String mimeType = normalizeMimeType(mime); if (prototypeMime == null) prototypeMime = mimeType; - Set p0 = mime2parser.get(mimeType); + LinkedHashSet p0 = mime2parser.get(mimeType); if (p0 == null) { - p0 = new HashSet(); + p0 = new LinkedHashSet(); mime2parser.put(mimeType, p0); } p0.add(parser); @@ -143,9 +145,9 @@ public final class TextParser { for (String ext: parser.supportedExtensions()) { // process the extensions ext = ext.toLowerCase(); - Set p0 = ext2parser.get(ext); + LinkedHashSet p0 = ext2parser.get(ext); if (p0 == null) { - p0 = new HashSet(); + p0 = new LinkedHashSet(); ext2parser.put(ext, p0); } p0.add(parser); diff --git a/source/net/yacy/document/parser/augment/AugmentParser.java b/source/net/yacy/document/parser/augment/AugmentParser.java index 9304c68c5..0d4bff25f 100644 --- a/source/net/yacy/document/parser/augment/AugmentParser.java +++ b/source/net/yacy/document/parser/augment/AugmentParser.java @@ -28,6 +28,7 @@ public class AugmentParser extends AbstractParser implements Parser { this.SUPPORTED_EXTENSIONS.add("html"); this.SUPPORTED_EXTENSIONS.add("htm"); + this.SUPPORTED_EXTENSIONS.add("xhtml"); this.SUPPORTED_EXTENSIONS.add("php"); this.SUPPORTED_MIME_TYPES.add("text/html"); this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml"); diff --git a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java index 4ef55ba76..ff011c1ea 100644 --- a/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java +++ b/source/net/yacy/document/parser/rdfa/impl/RDFaParser.java @@ -38,6 +38,7 @@ public class RDFaParser extends AbstractParser implements Parser { this.SUPPORTED_EXTENSIONS.add("html"); this.SUPPORTED_EXTENSIONS.add("htm"); + this.SUPPORTED_EXTENSIONS.add("xhtml"); this.SUPPORTED_EXTENSIONS.add("php"); this.SUPPORTED_MIME_TYPES.add("text/html"); this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml");