enhanced parser with more extension + mime attributes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6214 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent aee35bff6f
commit 43c8defd79

@ -18,7 +18,6 @@
<classpathentry exported="true" kind="lib" path="lib/commons-fileupload-1.2.1.jar"/>
<classpathentry exported="true" kind="lib" path="lib/servlet-api.jar"/>
<classpathentry exported="true" kind="lib" path="lib/commons-jxpath-1.1.jar"/>
<classpathentry exported="true" kind="lib" path="lib/sbbi-upnplib-1.0.4.jar"/>
<classpathentry kind="lib" path="lib/xerces.jar"/>
<classpathentry kind="lib" path="lib/bzip2.jar"/>
<classpathentry kind="lib" path="lib/mysql-connector-java-5.1.7-bin.jar"/>
@ -37,5 +36,6 @@
<classpathentry kind="lib" path="lib/odf_utils_05_11_29.jar"/>
<classpathentry kind="lib" path="lib/jrpm-SNAPSHOT.jar"/>
<classpathentry kind="lib" path="lib/activation.jar"/>
<classpathentry kind="lib" path="lib/sbbi-upnplib-1.0.4.jar"/>
<classpathentry kind="output" path="gen"/>
</classpath>

@ -123,7 +123,7 @@ public final class HTTPLoader {
String supportError = Parser.supportsExtension(entry.url());
if (supportError != null) {
sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, supportError);
throw new IOException("REJECTED WRONG EXTENSION TYPE " + entry.url().getFileExtension()+ " for URL " + entry.url().toString());
throw new IOException("REJECTED WRONG EXTENSION TYPE: " + supportError);
}
// check if url is in blacklist
@ -170,7 +170,7 @@ public final class HTTPLoader {
supportError = Parser.supports(entry.url(), res.getResponseHeader().mime());
if (supportError != null) {
sb.crawlQueues.errorURL.newEntry(entry, sb.peers.mySeed().hash, new Date(), 1, supportError);
throw new IOException("REJECTED WRONG MIME TYPE " + res.getResponseHeader().mime() + " for URL " + entry.url().toString());
throw new IOException("REJECTED WRONG MIME TYPE: " + supportError);
}
/*

@ -69,6 +69,7 @@ public class htmlParser extends AbstractParser implements Idiom {
SUPPORTED_MIME_TYPES.add("text/html");
SUPPORTED_MIME_TYPES.add("text/plain");
SUPPORTED_MIME_TYPES.add("text/sgml");
SUPPORTED_MIME_TYPES.add("text/csv");
}
public htmlParser() {

@ -68,10 +68,14 @@ public class odtParser extends AbstractParser implements Idiom {
SUPPORTED_EXTENSIONS.add("odt");
SUPPORTED_EXTENSIONS.add("ods");
SUPPORTED_EXTENSIONS.add("odp");
SUPPORTED_EXTENSIONS.add("sxw"); // Star Office Writer file format
SUPPORTED_EXTENSIONS.add("sxc"); // Star Office Calc file format
SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.text");
SUPPORTED_MIME_TYPES.add("application/x-vnd.oasis.opendocument.text");
SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.presentation");
SUPPORTED_MIME_TYPES.add("application/vnd.oasis.opendocument.spreadsheet");
SUPPORTED_MIME_TYPES.add("application/OOo-calc");
SUPPORTED_MIME_TYPES.add("application/OOo-writer");
}
public odtParser() {

@ -54,6 +54,7 @@ public class psParser extends AbstractParser implements Idiom {
public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
static {
SUPPORTED_EXTENSIONS.add("ps");
SUPPORTED_MIME_TYPES.add("application/postscript");
SUPPORTED_MIME_TYPES.add("application/ps");
SUPPORTED_MIME_TYPES.add("application/x-postscript");
SUPPORTED_MIME_TYPES.add("application/x-ps");

@ -50,10 +50,10 @@ public class rtfParser extends AbstractParser implements Idiom {
public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
static {
SUPPORTED_EXTENSIONS.add("rtf");
SUPPORTED_MIME_TYPES.add("application/rtf");
SUPPORTED_MIME_TYPES.add("text/rtf");
SUPPORTED_MIME_TYPES.add("application/x-rtf");
SUPPORTED_MIME_TYPES.add("text/richtext");
SUPPORTED_MIME_TYPES.add("application/rtf");
SUPPORTED_MIME_TYPES.add("application/x-rtf");
SUPPORTED_MIME_TYPES.add("application/x-soffice");
}

@ -49,6 +49,9 @@ public class vsdParser extends AbstractParser implements Idiom {
public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
static {
SUPPORTED_EXTENSIONS.add("vsd");
SUPPORTED_EXTENSIONS.add("vst");
SUPPORTED_EXTENSIONS.add("vdx");
SUPPORTED_EXTENSIONS.add("vtx");
SUPPORTED_MIME_TYPES.add("application/visio");
SUPPORTED_MIME_TYPES.add("application/x-visio");
SUPPORTED_MIME_TYPES.add("application/vnd.visio");

Loading…
Cancel
Save