as requested, disable/remove old swf parser

http://forum.yacy-websuche.de/viewtopic.php?f=8&t=5861#p33098
9 years ago · a4465c97d6
parent 7f63fc50f3
commit a4465c97d6
7 changed files with 1 additions and 130 deletions
--- a/.classpath
+++ b/.classpath
@ -16,7 +16,6 @@
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
 	<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
 	<classpathentry kind="lib" path="lib/J7Zip-modified.jar"/>
-	<classpathentry kind="lib" path="lib/webcat-swf-0.1.jar"/>
 	<classpathentry kind="lib" path="lib/commons-jxpath-1.3.jar"/>
 	<classpathentry kind="lib" path="lib/jsch-0.1.53.jar"/>
 	<classpathentry kind="lib" path="lib/jakarta-oro-2.0.8.jar"/>
--- a/build.xml
+++ b/build.xml
@ -238,7 +238,6 @@
        <pathelement location="${lib}/solr-solrj-5.5.2.jar" />
    	<pathelement location="${lib}/spatial4j-0.5.jar" />
        <pathelement location="${lib}/stax2-api_3.1.4.jar" />        
-    	<pathelement location="${lib}/webcat-swf-0.1.jar" />
    	<pathelement location="${lib}/weupnp-0.1.4.jar" />
    	<pathelement location="${lib}/woodstox-core-asl-4.4.1.jar" />
    	<pathelement location="${lib}/xercesImpl.jar" />
--- a/lib/webcat-swf-0.1.jar
+++ b/lib/webcat-swf-0.1.jar
--- a/lib/webcat-swf.License
+++ b/lib/webcat-swf.License
@ -1,7 +0,0 @@
-This package is part of WebCAT (http://webcat.sourceforge.net/)
-
-WebCAT was developed at the XLDB group of the Department of Informatics of the Faculty of Sciences of the University of Lisbon in Portugal.
-
-WebCAT was written by Bruno Martins.
-
-WebCAT is released under the BSD License. (http://www.opensource.org/licenses/bsd-license.php)
--- a/pom.xml
+++ b/pom.xml
@ -591,12 +591,7 @@
            <artifactId>slf4j-jdk14</artifactId>
            <version>1.7.21</version>
            <type>jar</type>
-        </dependency>
-        <dependency>
-            <groupId>pt.tumba</groupId>
-            <artifactId>webcat-swf</artifactId>
-            <version>0.1</version>
-        </dependency>        
+        </dependency>       
        <dependency>
            <groupId>org.bitlet</groupId>
            <artifactId>weupnp</artifactId>
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@ -56,7 +56,6 @@ import net.yacy.document.parser.rssParser;
 import net.yacy.document.parser.rtfParser;
 import net.yacy.document.parser.sevenzipParser;
 import net.yacy.document.parser.sidAudioParser;
-import net.yacy.document.parser.swfParser;
 import net.yacy.document.parser.tarParser;
 import net.yacy.document.parser.torrentParser;
 import net.yacy.document.parser.vcfParser;
@ -106,7 +105,6 @@ public final class TextParser {
        initParser(new sevenzipParser());
        initParser(new sidAudioParser());
        initParser(new svgParser());
-        initParser(new swfParser());
        initParser(new tarParser());
        initParser(new torrentParser());
        initParser(new vcfParser());
--- a/source/net/yacy/document/parser/swfParser.java
+++ b/source/net/yacy/document/parser/swfParser.java
@ -1,113 +0,0 @@
-//swfParser.java
-//------------------------
-//part of YaCy
-//(C) by Michael Peter Christen; mc@yacy.net
-//first published on http://www.anomic.de
-//Frankfurt, Germany, 2005
-//
-//this file is contributed by Marc Nause
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-//This program is free software; you can redistribute it and/or modify
-//it under the terms of the GNU General Public License as published by
-//the Free Software Foundation; either version 2 of the License, or
-//(at your option) any later version.
-//
-//This program is distributed in the hope that it will be useful,
-//but WITHOUT ANY WARRANTY; without even the implied warranty of
-//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//GNU General Public License for more details.
-//
-//You should have received a copy of the GNU General Public License
-//along with this program; if not, write to the Free Software
-//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.document.parser;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-
-import net.yacy.cora.document.id.DigestURL;
-import net.yacy.document.AbstractParser;
-import net.yacy.document.Document;
-import net.yacy.document.Parser;
-import net.yacy.document.VocabularyScraper;
-import net.yacy.document.parser.html.ContentScraper;
-import pt.tumba.parser.swf.SWF2HTML;
-
-public class swfParser extends AbstractParser implements Parser {
-
-    public swfParser() {
-        super("Adobe Flash Parser");
-        this.SUPPORTED_EXTENSIONS.add("swf");
-        this.SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash");
-        this.SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash2-preview");
-        this.SUPPORTED_MIME_TYPES.add("application/futuresplash");
-        this.SUPPORTED_MIME_TYPES.add("image/vnd.rn-realflash");
-    }
-
-    /*
-     * parses the source documents and returns a plasmaParserDocument containing
-     * all extracted information about the parsed document
-     */
-    @Override
-    public Document[] parse(
-            final DigestURL location,
-            final String mimeType,
-            final String charset,
-            final VocabularyScraper scraper, 
-            final int timezoneOffset,
-            final InputStream source)
-            throws Parser.Failure, InterruptedException
-    {
-
-        try {
-            final SWF2HTML swf2html = new SWF2HTML();
-            String contents = "";
-            try {
-                contents = swf2html.convertSWFToHTML(source);
-                scraperObject = htmlParser.parseToScraper(location, charset, scraper, timezoneOffset, contents, 100);
-            } catch (final NegativeArraySizeException e) {
-                throw new Parser.Failure(e.getMessage(), location);
-            } catch (final IOException e) {
-                throw new Parser.Failure(e.getMessage(), location);
-            } catch (final Exception e) {
-                throw new Parser.Failure(e.getMessage(), location);
-            }
-
-            // As the result of parsing this function must return a plasmaParserDocument object
-            ContentScraper htmlscraper = (ContentScraper) this.scraperObject; // shortcut to access ContentScraper methodes
-            return new Document[]{new Document(
-                location, // url of the source document
-                mimeType, // the documents mime type
-                StandardCharsets.UTF_8.name(), // charset of the document text
-                this,
-                htmlscraper.getContentLanguages(),
-                htmlscraper.getKeywords(),
-                htmlscraper.getTitles(),
-                htmlscraper.getAuthor(),
-                htmlscraper.getPublisher(),
-                null, // sections
-                htmlscraper.getDescriptions(),
-                htmlscraper.getLon(), htmlscraper.getLat(),
-                htmlscraper.getText(),
-                htmlscraper.getAnchors(),
-                htmlscraper.getRSS(),
-                null, // images
-                false,
-                htmlscraper.getDate())};
-        } catch (final Exception e) {
-            if (e instanceof InterruptedException) throw (InterruptedException) e;
-
-            // if an unexpected error occures just log the error and raise a new Parser.Failure
-            final String errorMsg = "Unable to parse the swf document '" + location + "':" + e.getMessage();
-            //AbstractParser.log.logSevere(errorMsg);
-            throw new Parser.Failure(errorMsg, location);
-        }
-    }
-
-}