http://forum.yacy-websuche.de/viewtopic.php?f=8&t=5861#p33098pull/93/head
parent
7f63fc50f3
commit
a4465c97d6
Binary file not shown.
@ -1,7 +0,0 @@
|
||||
This package is part of WebCAT (http://webcat.sourceforge.net/)
|
||||
|
||||
WebCAT was developed at the XLDB group of the Department of Informatics of the Faculty of Sciences of the University of Lisbon in Portugal.
|
||||
|
||||
WebCAT was written by Bruno Martins.
|
||||
|
||||
WebCAT is released under the BSD License. (http://www.opensource.org/licenses/bsd-license.php)
|
@ -1,113 +0,0 @@
|
||||
//swfParser.java
|
||||
//------------------------
|
||||
//part of YaCy
|
||||
//(C) by Michael Peter Christen; mc@yacy.net
|
||||
//first published on http://www.anomic.de
|
||||
//Frankfurt, Germany, 2005
|
||||
//
|
||||
//this file is contributed by Marc Nause
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
//This program is free software; you can redistribute it and/or modify
|
||||
//it under the terms of the GNU General Public License as published by
|
||||
//the Free Software Foundation; either version 2 of the License, or
|
||||
//(at your option) any later version.
|
||||
//
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU General Public License for more details.
|
||||
//
|
||||
//You should have received a copy of the GNU General Public License
|
||||
//along with this program; if not, write to the Free Software
|
||||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.document.parser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.document.AbstractParser;
|
||||
import net.yacy.document.Document;
|
||||
import net.yacy.document.Parser;
|
||||
import net.yacy.document.VocabularyScraper;
|
||||
import net.yacy.document.parser.html.ContentScraper;
|
||||
import pt.tumba.parser.swf.SWF2HTML;
|
||||
|
||||
public class swfParser extends AbstractParser implements Parser {
|
||||
|
||||
public swfParser() {
|
||||
super("Adobe Flash Parser");
|
||||
this.SUPPORTED_EXTENSIONS.add("swf");
|
||||
this.SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash");
|
||||
this.SUPPORTED_MIME_TYPES.add("application/x-shockwave-flash2-preview");
|
||||
this.SUPPORTED_MIME_TYPES.add("application/futuresplash");
|
||||
this.SUPPORTED_MIME_TYPES.add("image/vnd.rn-realflash");
|
||||
}
|
||||
|
||||
/*
|
||||
* parses the source documents and returns a plasmaParserDocument containing
|
||||
* all extracted information about the parsed document
|
||||
*/
|
||||
@Override
|
||||
public Document[] parse(
|
||||
final DigestURL location,
|
||||
final String mimeType,
|
||||
final String charset,
|
||||
final VocabularyScraper scraper,
|
||||
final int timezoneOffset,
|
||||
final InputStream source)
|
||||
throws Parser.Failure, InterruptedException
|
||||
{
|
||||
|
||||
try {
|
||||
final SWF2HTML swf2html = new SWF2HTML();
|
||||
String contents = "";
|
||||
try {
|
||||
contents = swf2html.convertSWFToHTML(source);
|
||||
scraperObject = htmlParser.parseToScraper(location, charset, scraper, timezoneOffset, contents, 100);
|
||||
} catch (final NegativeArraySizeException e) {
|
||||
throw new Parser.Failure(e.getMessage(), location);
|
||||
} catch (final IOException e) {
|
||||
throw new Parser.Failure(e.getMessage(), location);
|
||||
} catch (final Exception e) {
|
||||
throw new Parser.Failure(e.getMessage(), location);
|
||||
}
|
||||
|
||||
// As the result of parsing this function must return a plasmaParserDocument object
|
||||
ContentScraper htmlscraper = (ContentScraper) this.scraperObject; // shortcut to access ContentScraper methodes
|
||||
return new Document[]{new Document(
|
||||
location, // url of the source document
|
||||
mimeType, // the documents mime type
|
||||
StandardCharsets.UTF_8.name(), // charset of the document text
|
||||
this,
|
||||
htmlscraper.getContentLanguages(),
|
||||
htmlscraper.getKeywords(),
|
||||
htmlscraper.getTitles(),
|
||||
htmlscraper.getAuthor(),
|
||||
htmlscraper.getPublisher(),
|
||||
null, // sections
|
||||
htmlscraper.getDescriptions(),
|
||||
htmlscraper.getLon(), htmlscraper.getLat(),
|
||||
htmlscraper.getText(),
|
||||
htmlscraper.getAnchors(),
|
||||
htmlscraper.getRSS(),
|
||||
null, // images
|
||||
false,
|
||||
htmlscraper.getDate())};
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
|
||||
// if an unexpected error occures just log the error and raise a new Parser.Failure
|
||||
final String errorMsg = "Unable to parse the swf document '" + location + "':" + e.getMessage();
|
||||
//AbstractParser.log.logSevere(errorMsg);
|
||||
throw new Parser.Failure(errorMsg, location);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue