recognize more html file extensions

pull/1/head
Michael Peter Christen 11 years ago
parent 8480641f2d
commit a1ee101079

@ -33,7 +33,9 @@ import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Set;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.AnchorURL;
@ -56,22 +58,21 @@ public class htmlParser extends AbstractParser implements Parser {
private static final int maxLinks = 10000;
public final static String[] htmlExtensions = new String[]{
"htm","html","phtml","shtml","shtm","stm","xhtml","phtml","phtm",
"tpl","php","php2","php3","php4","php5","cfm","asp","aspx","tex","txt"
};
public final static Set<String> htmlExtensionsSet;
static {
htmlExtensionsSet = new HashSet<>(htmlExtensions.length);
for (String ext: htmlExtensions) htmlExtensionsSet.add(ext);
}
public htmlParser() {
super("Streaming HTML Parser");
this.SUPPORTED_EXTENSIONS.add("htm");
this.SUPPORTED_EXTENSIONS.add("html");
this.SUPPORTED_EXTENSIONS.add("phtml");
this.SUPPORTED_EXTENSIONS.add("shtml");
this.SUPPORTED_EXTENSIONS.add("xhtml");
this.SUPPORTED_EXTENSIONS.add("php");
this.SUPPORTED_EXTENSIONS.add("php3");
this.SUPPORTED_EXTENSIONS.add("php4");
this.SUPPORTED_EXTENSIONS.add("php5");
this.SUPPORTED_EXTENSIONS.add("cfm");
this.SUPPORTED_EXTENSIONS.add("asp");
this.SUPPORTED_EXTENSIONS.add("aspx");
this.SUPPORTED_EXTENSIONS.add("tex");
this.SUPPORTED_EXTENSIONS.add("txt");
this.SUPPORTED_EXTENSIONS.addAll(htmlExtensionsSet);
this.SUPPORTED_MIME_TYPES.add("text/html");
this.SUPPORTED_MIME_TYPES.add("text/xhtml+xml");
this.SUPPORTED_MIME_TYPES.add("application/xhtml+xml");

Loading…
Cancel
Save