*) updated Apache POI library to be able to parse Visio files

*) updated PPT and XLS parsers to use new Apache POI library
*) added new Visio (VSD) parser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6145 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 16 years ago
parent a10c8022d1
commit 457b6c0d6d

@ -14,8 +14,8 @@
       <classPath>..\..\yacy.jar</classPath>        <classPath>..\..\yacy.jar</classPath>
       <classPath>..\..\lib\commons-collections.jar</classPath>        <classPath>..\..\lib\commons-collections.jar</classPath>
       <classPath>..\..\lib\commons-pool.jar</classPath>        <classPath>..\..\lib\commons-pool.jar</classPath>
       <classPath>..\..\libx\poi-3.0-alpha2-20060616.jar</classPath>        <classPath>..\..\libx\poi-3.2-FINAL-20081019.jar</classPath>
       <classPath>..\..\libx\poi-scratchpad-3.0-20060616.jar</classPath>        <classPath>..\..\libx\poi-scratchpad-3.2-FINAL-20081019.jar</classPath>
       <classPath>..\..\libx\bzip2.jar</classPath>        <classPath>..\..\libx\bzip2.jar</classPath>
       <classPath>..\..\libx\commons-codec-1.3.jar</classPath>        <classPath>..\..\libx\commons-codec-1.3.jar</classPath>
       <classPath>..\..\libx\commons-discovery.jar</classPath>        <classPath>..\..\libx\commons-discovery.jar</classPath>

@ -252,18 +252,18 @@ minimumGlobalDelta = 500
# parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser # parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
# parseableMime: specifies mime-types that can be indexed but not on the fly # parseableMime: specifies mime-types that can be indexed but not on the fly
parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml parseableMimeTypes.HTML=application/xhtml+xml,text/html,text/plain,text/sgml
parseableMimeTypes=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
parseableMimeTypes.CRAWLER=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.CRAWLER=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
parseableMimeTypes.PROXY=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.PROXY=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
parseableMimeTypes.ICAP=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.ICAP=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
parseableMimeTypes.URLREDIRECTOR=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-vnd.oasis.opendocument.text,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml parseableMimeTypes.URLREDIRECTOR=application/atom+xml,application/bzip2,application/excel,application/gzip,application/java-archive,application/msexcel,application/mspowerpoint,application/msword,application/octet-stream,application/pdf,application/postscript,application/powerpoint,application/rdf+xml,application/rss+xml,application/rtf,application/tar,application/vcard,application/visio,application/visio.drawing,application/vnd.ms-excel,application/vnd.ms-powerpoint,application/vnd.oasis.opendocument.text,application/vnd.visio,application/vsd,application/x-7z-compressed,application/x-bz2,application/x-bzip2,application/x-compress,application/x-compressed,application/x-excel,application/x-gzip,application/x-msexcel,application/x-redhat packet manager,application/x-redhat-package-manager,application/x-rpm,application/x-shockwave-flash,application/x-shockwave-flash2-preview,application/x-tar,application/x-visio,application/x-vnd.oasis.opendocument.text,application/x-vsd,application/x-xml,application/x-zip,application/x-zip-compressed,application/xml,application/zip,image/x-vsd,text/postscript,text/rss,text/rtf,text/x-vcard,text/xml,zz-application/zz-winassoc-vsd
parseableMimeTypes.IMAGE=image/gif,image/jpeg,image/png,image/tiff,image/vnd.wap.wbmp,image/x-icon,image/bmp parseableMimeTypes.IMAGE=image/gif,image/jpeg,image/png,image/tiff,image/vnd.wap.wbmp,image/x-icon,image/bmp
# media extension string # media extension string
# a comma-separated list of extensions that denote media file formats # a comma-separated list of extensions that denote media file formats
# this is important to recognize <a href> - tags as not-html reference # this is important to recognize <a href> - tags as not-html reference
# These files will be excluded from indexing _(Please keep extensions in alphabetical order)_ # These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bmp,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,war,wav,wmv,xcf,xls,zip mediaExt=7z,ace,aif,aiff,arj,asf,asx,avi,bin,bmp,bz2,css,db,dcm,deb,doc,dll,dmg,exe,gif,gz,hqx,ico,img,iso,jar,jpe,jpg,jpeg,lx,lxl,m4v,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,scr,sit,so,swf,sxc,sxd,sxi,sxw,tar,tbz,tgz,torrent,vsd,war,wav,wmv,xcf,xls,zip
parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp
# Promotion Strings # Promotion Strings

Binary file not shown.

Binary file not shown.

@ -18,8 +18,8 @@
<pathelement location="${build}" /> <pathelement location="${build}" />
<!-- libraries needed by this parser --> <!-- libraries needed by this parser -->
<pathelement location="${libx}/poi-3.0-alpha2-20060616.jar" /> <pathelement location="${libx}/poi-3.2-FINAL-20081019.jar" />
<pathelement location="${libx}/poi-scratchpad-3.0-alpha2-20060616.jar" /> <pathelement location="${libx}/poi-scratchpad-3.2-FINAL-20081019.jar" />
</classpath> </classpath>
</javac> </javac>
</target> </target>

@ -4,20 +4,24 @@
//(C) by Michael Peter Christen; mc@yacy.net //(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de //first published on http://www.anomic.de
//Frankfurt, Germany, 2005 //Frankfurt, Germany, 2005
//
//this file is contributed by Tim Riemann //this file is contributed by Tim Riemann
//last major change: 10.09.2006 //
// $LastChangedDate:$
// $LastChangedRevision:$
// $LastChangedBy:$
//
//
//This program is free software; you can redistribute it and/or modify //This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by //it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or //the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version. //(at your option) any later version.
//
//This program is distributed in the hope that it will be useful, //This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of //but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details. //GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License //You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software //along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@ -54,8 +58,8 @@ public class pptParser extends AbstractParser implements Parser {
* @see Parser#getLibxDependences() * @see Parser#getLibxDependences()
*/ */
private static final String[] LIBX_DEPENDENCIES = new String[] { private static final String[] LIBX_DEPENDENCIES = new String[] {
"poi-3.0-alpha2-20060616.jar", "poi-3.2-FINAL-20081019.jar",
"poi-scratchpad-3.0-alpha2-20060616.jar" "poi-scratchpad-3.2-FINAL-20081019.jar"
}; };
public pptParser(){ public pptParser(){
@ -121,6 +125,7 @@ public class pptParser extends AbstractParser implements Parser {
return SUPPORTED_MIME_TYPES; return SUPPORTED_MIME_TYPES;
} }
@Override
public void reset(){ public void reset(){
//nothing to do //nothing to do
super.reset(); super.reset();

@ -0,0 +1,66 @@
<?xml version="1.0"?>
<project name="YACY - vsdParser" default="dist">
<description>A class to parse ppt files</description>
<!-- the parsers short name -->
<property name="parserShortName" value="vsd"/>
<!-- parser version number -->
<property name="parserVersion" value="0.1"/>
<property name="parserLongName" value="yacyContentParser_${parserShortName}"/>
<property name="parserArchive" location="${release}/${parserLongName}_${parserVersion}.tgz"/>
<!-- compile the sources of this parser -->
<target name="compile">
<javac srcdir="${src}/de/anomic/plasma/parser/${parserShortName}" destdir="${build}" source="${javacSource}" target="${javacTarget}" debug="true" debuglevel="lines,vars,source">
<classpath>
<pathelement location="${build}" />
<!-- libraries needed by this parser -->
<pathelement location="${libx}/poi-3.2-FINAL-20081019.jar" />
<pathelement location="${libx}/poi-scratchpad-3.2-FINAL-20081019.jar" />
</classpath>
</javac>
</target>
<!-- add all parts of this parser into a single zip file -->
<target name="zip" depends="compile">
<tar destfile="${parserArchive}" compression="gzip">
<!-- needed libraries and license files of these libs -->
<tarfileset dir="${libx}"
includes="poi-*.*"
prefix="${releaseFileParentDir}/libx/"
dirmode="755" mode="644"/>
<!-- source files of this parser -->
<tarfileset dir="${src}/de/anomic/plasma/parser/${parserShortName}"
prefix="${releaseFileParentDir}/source/de/anomic/plasma/parser/${parserShortName}"
dirmode="755" mode="644"/>
<!-- class files of this parser -->
<tarfileset dir="${build}/de/anomic/plasma/parser/${parserShortName}"
prefix="${releaseFileParentDir}/classes/de/anomic/plasma/parser/${parserShortName}"
dirmode="755" mode="644"/>
</tar>
</target>
<!-- just copy all parts of this parser into the release directory -->
<target name="copy" depends="compile">
<!-- copy needed libs -->
<copy todir="${release}/libx/">
<fileset dir="${libx}" includes="poi-*.*"/>
</copy>
<!-- copy source code files -->
<copy todir="${release}/source/de/anomic/plasma/parser/${parserShortName}">
<fileset dir="${src}/de/anomic/plasma/parser/${parserShortName}" includes="**/*"/>
</copy>
<!-- copy compiled classes -->
<copy todir="${release}/classes/de/anomic/plasma/parser/${parserShortName}">
<fileset dir="${build}/de/anomic/plasma/parser/${parserShortName}" includes="**/*"/>
</copy>
</target>
<target name="dist" depends="compile,zip" description="Compile and zip the parser"/>
</project>

@ -0,0 +1,160 @@
//vsdParser.java
//------------------------
//part of YaCy
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2005
//
//this file is contributed by Marc Nause
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.plasma.parser.vsd;
import java.io.InputStream;
import java.util.Hashtable;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
import de.anomic.plasma.parser.ParserException;
import de.anomic.yacy.yacyURL;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpsf.SummaryInformation;
public class vsdParser extends AbstractParser implements Parser {
/**
* a list of mime types that are supported by this parser class
* @see #getSupportedMimeTypes()
*/
public static final Hashtable<String, String> SUPPORTED_MIME_TYPES = new Hashtable<String, String>();
static {
SUPPORTED_MIME_TYPES.put("application/visio","vsd");
SUPPORTED_MIME_TYPES.put("application/x-visio","vsd");
SUPPORTED_MIME_TYPES.put("application/vnd.visio","vsd");
SUPPORTED_MIME_TYPES.put("application/visio.drawing","vsd");
SUPPORTED_MIME_TYPES.put("application/vsd","vsd");
SUPPORTED_MIME_TYPES.put("application/x-vsd","vsd");
SUPPORTED_MIME_TYPES.put("image/x-vsd","vsd");
SUPPORTED_MIME_TYPES.put("zz-application/zz-winassoc-vsd","vsd");
}
/**
* a list of library names that are needed by this parser
* @see Parser#getLibxDependences()
*/
private static final String[] LIBX_DEPENDENCIES = new String[] {
"poi-3.2-FINAL-20081019.jar",
"poi-scratchpad-3.2-FINAL-20081019.jar",
};
public vsdParser() {
super(LIBX_DEPENDENCIES);
this.parserName = "Microsoft Visio Parser";
this.parserVersionNr = "0.1";
}
/**
* returns a hashtable containing the mimetypes that are supported by this class
*/
public Hashtable<String, String> getSupportedMimeTypes() {
return SUPPORTED_MIME_TYPES;
}
/*
* parses the source documents and returns a plasmaParserDocument containing
* all extracted information about the parsed document
*/
public plasmaParserDocument parse(final yacyURL location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
plasmaParserDocument theDoc = null;
try {
String contents = "";
SummaryInformation summary = null;
try {
VisioTextExtractor extractor = new VisioTextExtractor(source);
contents = extractor.getText();
summary = extractor.getSummaryInformation();
} catch (Exception e) {
e.printStackTrace();
}
String author = null;
String[] keywords = null;
String title = null;
if (summary != null) {
author = summary.getAuthor();
if (summary.getKeywords() != null) {
keywords = summary.getKeywords().split("[ ,;]");
}
title = summary.getTitle();
}
String abstrct = null;
abstrct = ((contents.length() > 80)? contents.substring(0, 80):contents.trim()).
replaceAll("\r\n"," ").
replaceAll("\n"," ").
replaceAll("\r"," ").
replaceAll("\t"," ");
if (title == null) {
title = abstrct;
}
// As the result of parsing this function must return a plasmaParserDocument object
theDoc = new plasmaParserDocument(
location, // url of the source document
mimeType, // the documents mime type
"UTF-8", // charset of the document text
null, // language
keywords,
title,
author,
null, // an array of section headlines
abstrct, // an abstract
contents.getBytes("UTF-8"), // the parsed document text
null, // a map of extracted anchors
null); // a treeset of image URLs
return theDoc;
} catch (final Exception e) {
if (e instanceof InterruptedException) throw (InterruptedException) e;
// if an unexpected error occures just log the error and raise a new ParserException
final String errorMsg = "Unable to parse the vsd document '" + location + "':" + e.getMessage();
this.theLogger.logSevere(errorMsg);
throw new ParserException(errorMsg, location);
} finally {
if (theDoc == null) {
// if an unexpected error occures just log the error and raise a new ParserException
final String errorMsg = "Unable to parse the vsd document '" + location + "': possibly out of memory";
this.theLogger.logSevere(errorMsg);
throw new ParserException(errorMsg, location);
}
}
}
@Override
public void reset() {
// this code is executed if the parser class is returned into the parser pool
super.reset();
}
}

@ -16,10 +16,10 @@
<javac srcdir="${src}/de/anomic/plasma/parser/${parserShortName}" destdir="${build}" source="${javacSource}" target="${javacTarget}" debug="true" debuglevel="lines,vars,source"> <javac srcdir="${src}/de/anomic/plasma/parser/${parserShortName}" destdir="${build}" source="${javacSource}" target="${javacTarget}" debug="true" debuglevel="lines,vars,source">
<classpath> <classpath>
<pathelement location="${build}" /> <pathelement location="${build}" />
<!-- libraries needed by this parser --> <!-- libraries needed by this parser -->
<pathelement location="${libx}/poi-3.0-alpha2-20060616.jar" /> <pathelement location="${libx}/poi-3.2-FINAL-20081019.jar" />
<pathelement location="${libx}/poi-scratchpad-3.0-alpha2-20060616.jar" /> <pathelement location="${libx}/poi-scratchpad-3.2-FINAL-20081019.jar" />
</classpath> </classpath>
</javac> </javac>
</target> </target>

@ -4,20 +4,23 @@
//(C) by Michael Peter Christen; mc@yacy.net //(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de //first published on http://www.anomic.de
//Frankfurt, Germany, 2005 //Frankfurt, Germany, 2005
//
//this file is contributed by Tim Riemann //this file is contributed by Tim Riemann
//last major change: 12.09.2006 //
// $LastChangedDate:$
// $LastChangedRevision:$
// $LastChangedBy:$
//
//This program is free software; you can redistribute it and/or modify //This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by //it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or //the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version. //(at your option) any later version.
//
//This program is distributed in the hope that it will be useful, //This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of //but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details. //GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License //You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software //along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@ -68,8 +71,8 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
* @see Parser#getLibxDependences() * @see Parser#getLibxDependences()
*/ */
private static final String[] LIBX_DEPENDENCIES = new String[] { private static final String[] LIBX_DEPENDENCIES = new String[] {
"poi-3.0-alpha2-20060616.jar", "poi-3.2-FINAL-20081019.jar",
"poi-scratchpad-3.0-alpha2-20060616.jar", "poi-scratchpad-3.2-FINAL-20081019.jar"
}; };
public xlsParser(){ public xlsParser(){
@ -143,6 +146,7 @@ public class xlsParser extends AbstractParser implements Parser, HSSFListener {
return SUPPORTED_MIME_TYPES; return SUPPORTED_MIME_TYPES;
} }
@Override
public void reset(){ public void reset(){
//nothing to do //nothing to do
super.reset(); super.reset();

Loading…
Cancel
Save