*) added FreeMind (http://freemind.sourceforge.net/) mindmap parser

*) minor changes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7397 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 14 years ago
parent 3d95981f7d
commit 936e976c23

@ -48,6 +48,7 @@ lzh = application/x-lzh
m4v = video/x-m4v
mf = application/octet-stream
mov = video/quicktime
mm = application/freemind
mp2 = audio/mpeg
mp3 = audio/mpeg
mp4 = video/mp4

@ -32,11 +32,11 @@ import net.yacy.kelondro.logging.Log;
public class Classification {
private static final HashSet<String> mediaExtSet = new HashSet<String>();
private static final HashSet<String> imageExtSet = new HashSet<String>();
private static final HashSet<String> audioExtSet = new HashSet<String>();
private static final HashSet<String> videoExtSet = new HashSet<String>();
private static final HashSet<String> appsExtSet = new HashSet<String>();
private static final Set<String> mediaExtSet = new HashSet<String>();
private static final Set<String> imageExtSet = new HashSet<String>();
private static final Set<String> audioExtSet = new HashSet<String>();
private static final Set<String> videoExtSet = new HashSet<String>();
private static final Set<String> appsExtSet = new HashSet<String>();
private static final Properties ext2mime = new Properties();

@ -57,6 +57,7 @@ import net.yacy.document.parser.vsdParser;
import net.yacy.document.parser.xlsParser;
import net.yacy.document.parser.zipParser;
import net.yacy.document.parser.images.genericImageParser;
import net.yacy.document.parser.mmParser;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
@ -79,6 +80,7 @@ public final class TextParser {
initParser(new gzipParser());
initParser(new htmlParser());
initParser(new genericImageParser());
initParser(new mmParser());
initParser(new odtParser());
initParser(new ooxmlParser());
initParser(new pdfParser());
@ -263,7 +265,7 @@ public final class TextParser {
}
if (docs == null) {
if (failedParser.size() == 0) {
if (failedParser.isEmpty()) {
final String errorMsg = "Parsing content with file extension '" + location.getFileExtension() + "' and mimetype '" + mimeType + "' failed.";
//log.logWarning("Unable to parse '" + location + "'. " + errorMsg);
throw new Parser.Failure(errorMsg, location);

@ -0,0 +1,122 @@
/**
* mmParser
* Copyright 2010 by Marc Nause, marc.nause@gmx.de, Braunschweig, Germany
* First released 27.12.2010 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.parser;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
// this is a new implementation of this parser idiom using multiple documents as result set
public class mmParser extends AbstractParser implements Parser {
public mmParser() {
super("FreeMind Parser");
SUPPORTED_EXTENSIONS.add("mm");
SUPPORTED_MIME_TYPES.add("application/freemind");
}
public Document[] parse(final MultiProtocolURI location, final String mimeType,
final String charset, final InputStream source)
throws Parser.Failure, InterruptedException
{
final StringBuilder sb = new StringBuilder();
String rootElementText = "";
byte[] content = new byte[0];
try {
final SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
final FreeMindHandler freeMindHandler = new FreeMindHandler();
saxParser.parse(source, freeMindHandler);
final List<String> nodeTextList = freeMindHandler.getNodeText();
rootElementText = (nodeTextList.size() > 0) ? nodeTextList.get(0) : "";
for (final String nodeText : nodeTextList) {
sb.append(nodeText);
sb.append(". ");
}
content = sb.toString().getBytes("UTF-8");
} catch (ParserConfigurationException ex) {
log.logWarning(ex.getMessage());
} catch (SAXException ex) {
log.logWarning(ex.getMessage());
} catch (IOException ex) {
log.logWarning(ex.getMessage());
}
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
null,
null,
rootElementText,
null,
null,
null,
null,
content,
null,
null,
null,
false)};
}
private class FreeMindHandler extends DefaultHandler {
private List<String> nodeText = new ArrayList<String>();
@Override
public void startElement(final String uri, final String localName,
final String qName, final Attributes attributes) {
final String textValue = attributes.getValue("TEXT");
if (textValue != null) {
nodeText.add(textValue);
}
}
protected List<String> getNodeText() {
return nodeText;
}
}
}

@ -55,7 +55,10 @@ public class swfParser extends AbstractParser implements Parser {
* parses the source documents and returns a plasmaParserDocument containing
* all extracted information about the parsed document
*/
public Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws Parser.Failure, InterruptedException {
public Document[] parse(final MultiProtocolURI location, final String mimeType,
final String charset, final InputStream source)
throws Parser.Failure, InterruptedException
{
try {
final SWF2HTML swf2html = new SWF2HTML();

Loading…
Cancel
Save