*) added SID file (Commodore 64) sound file parser

*) minor changes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7403 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 14 years ago
parent ee3cef91e8
commit 11ea966f9e

@ -94,6 +94,7 @@ rss = application/rss+xml
rtf = application/rtf
sh = text/plain
shtml = text/html
sid = audio/prs.sid
stream = audio/x-qt-stream
svg = image/svg+xml
swf = application/x-shockwave-flash

@ -56,7 +56,7 @@ public class Classification {
}
final String apps = "7z,ace,arc,arj,apk,asf,asx,bat,bin,bkf,bz2,cab,com,css,dcm,deb,dll,dmg,exe,gho,ghs,gz,hqx,img,iso,jar,lha,rar,sh,sit,sitx,tar,tbz,tgz,tib,torrent,vbs,war,zip";
final String audio = "aac,aif,aiff,flac,m4a,m4p,mid,mp2,mp3,oga,ogg,ram,wav,wma";
final String audio = "aac,aif,aiff,flac,m4a,m4p,mid,mp2,mp3,oga,ogg,ram,sid,wav,wma";
final String video = "3g2,3gp,3gp2,3gpp,3gpp2,3ivx,asf,asx,avi,div,divx,dv,dvx,env,f4v,flv,hdmov,m1v,m4v,m-jpeg,moov,mov,movie,mp2v,mp4,mpe,mpeg,mpg,mpg4,mv4,ogm,ogv,qt,rm,rv,vid,swf,wmv";
final String image = "ai,bmp,cdr,cmx,emf,eps,gif,img,jpeg,jpg,mng,pct,pdd,pdn,pict,png,psb,psd,psp,tif,tiff,wmf";

@ -84,7 +84,7 @@ public class Document {
private int inboundLinks, outboundLinks; // counters for inbound and outbound links, are counted after calling notifyWebStructure
private Set<String> languages;
private boolean indexingDenied;
public Document(final MultiProtocolURI location, final String mimeType, final String charset, final Set<String> languages,
final String[] keywords, final String title, final String author, final String publisher,
final String[] sections, final String abstrct,

@ -73,16 +73,18 @@ public interface Parser {
* @param o
* @return
*/
@Override
public boolean equals(Object o);
/**
* the hash code of a parser
* @return the hash code of the parser name string
*/
@Override
public int hashCode();
/**
* a parser waring
* a parser warning
* thrown as an exception
*/
public class Failure extends Exception {

@ -58,6 +58,7 @@ import net.yacy.document.parser.xlsParser;
import net.yacy.document.parser.zipParser;
import net.yacy.document.parser.images.genericImageParser;
import net.yacy.document.parser.mmParser;
import net.yacy.document.parser.sidAudioParser;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
@ -89,6 +90,7 @@ public final class TextParser {
initParser(new rssParser());
initParser(new rtfParser());
initParser(new sevenzipParser());
initParser(new sidAudioParser());
initParser(new swfParser());
initParser(new tarParser());
initParser(new torrentParser());

@ -0,0 +1,133 @@
/**
* sidAudioParser
* Copyright 2010 by Marc Nause, marc.nause@gmx.de, Braunschweig, Germany
* First released 28.12.2010 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.parser;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
// this is a new implementation of this parser idiom using multiple documents as result set
/**
* Parser for Commodore 64 SID audio files.
* @see <a href="http://cpansearch.perl.org/src/LALA/Audio-SID-3.11/SID_file_format.txt">
* SID file format description</a>
* @author low012
*/
public class sidAudioParser extends AbstractParser implements Parser {
public sidAudioParser() {
super("Commodore 64 SID audio file Parser");
SUPPORTED_EXTENSIONS.add("sid");
SUPPORTED_MIME_TYPES.add("audio/prs.sid");
SUPPORTED_MIME_TYPES.add("audio/psid");
SUPPORTED_MIME_TYPES.add("audio/x-psid");
SUPPORTED_MIME_TYPES.add("audio/sidtune");
SUPPORTED_MIME_TYPES.add("audio/x-sidtune");
}
public Document[] parse(final MultiProtocolURI location, final String mimeType,
final String charset, final InputStream source)
throws Parser.Failure, InterruptedException
{
try {
final int available = source.available();
final byte[] b = new byte[available];
if (available >= 128 && source.read(b) >= 128) {
final int version = (b[4] << 2) + b[5];
Map<String, String> header = new HashMap<String, String>();
switch (version) {
case 1:
header = parseHeader(b);
break;
case 2:
header = parseHeader(b);
break;
default:
throw new Parser.Failure("Unable to parse SID file, unexpected version: " + version, location);
}
return new Document[]{new Document(location,
mimeType,
"UTF-8",
null,
null,
header.get("name"),
header.get("author"),
header.get("publisher"),
null,
null,
null,
null,
null,
null,
false)};
} else {
throw new Parser.Failure("Unable to parse SID file, file does seems to be incomplete (len = " + available + ").", location);
}
} catch (IOException ex) {
throw new Parser.Failure("Unable to read SID file header.", location, ex);
}
}
/**
*
* @param header must contain at least the header of the SID file.
* @return values parsed from the input data
*/
private Map<String, String> parseHeader(final byte[] header) {
final byte[] name = new byte[32];
for (int i = 0; i < 32; i++) {
name[i] = header[i + 16];
}
final byte[] author = new byte[32];
for (int i = 0; i < 32; i++) {
author[i] = header[i + 48];
}
final byte[] copyright = new byte[32];
for (int i = 0; i < 32; i++) {
copyright[i] = header[i + 80];
}
Map<String, String> ret = new HashMap<String, String>();
ret.put("name", new String(name, Charset.forName("ISO-8859-1")).trim());
ret.put("author", new String(author, Charset.forName("ISO-8859-1")).trim());
ret.put("publisher", new String(copyright, Charset.forName("ISO-8859-1")).trim());
return ret;
}
}
Loading…
Cancel
Save