// RSSReader.java // (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 16.07.2007 on http://yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.document.parser.xml; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import net.yacy.document.content.RSSMessage; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.ByteBuffer; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class RSSReader extends DefaultHandler { // class variables private RSSMessage item; private final StringBuilder buffer; private boolean parsingChannel, parsingImage, parsingItem; private final RSSFeed theChannel; public RSSReader() { theChannel = new RSSFeed(); buffer = new StringBuilder(); item = null; parsingChannel = false; parsingImage = false; parsingItem = false; } public RSSReader(final String path) throws IOException { this(); final SAXParserFactory factory = SAXParserFactory.newInstance(); try { final SAXParser saxParser = factory.newSAXParser(); saxParser.parse(path, this); } catch (SAXException e) { throw new IOException (e.getMessage()); } catch (ParserConfigurationException e) { throw new IOException (e.getMessage()); } } public RSSReader(final InputStream stream) throws IOException { this(); final SAXParserFactory factory = SAXParserFactory.newInstance(); try { final SAXParser saxParser = factory.newSAXParser(); saxParser.parse(stream, this); } catch (SAXException e) { throw new IOException (e.getMessage()); } catch (ParserConfigurationException e) { throw new IOException (e.getMessage()); } } public static RSSReader parse(final byte[] a) { // check integrity of array if ((a == null) || (a.length == 0)) { Log.logWarning("rssReader", "response=null"); return null; } if (a.length < 100) { Log.logWarning("rssReader", "response=" + new String(a)); return null; } if (!ByteBuffer.equals(a, "<?xml".getBytes())) { Log.logWarning("rssReader", "response does not contain valid xml"); return null; } final String end = new String(a, a.length - 10, 10); if (end.indexOf("rss") < 0) { Log.logWarning("rssReader", "response incomplete"); return null; } // make input stream final ByteArrayInputStream bais = new ByteArrayInputStream(a); // parse stream RSSReader reader = null; try { reader = new RSSReader(bais); } catch (final Exception e) { Log.logException(e); Log.logWarning("rssReader", "parse exception: " + e.getMessage(), e); return null; } try { bais.close(); } catch (final IOException e) {} return reader; } @Override public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException { if ("channel".equals(tag)) { item = new RSSMessage(); parsingChannel = true; } else if ("item".equals(tag)) { item = new RSSMessage(); parsingItem = true; } else if ("image".equals(tag)) { parsingImage = true; } } @Override public void endElement(final String uri, final String name, final String tag) { if (tag == null) return; if ("channel".equals(tag)) { parsingChannel = false; theChannel.setChannel(item); } else if ("item".equals(tag)) { theChannel.addMessage(item); parsingItem = false; } else if ("image".equals(tag)) { parsingImage = false; } else if ((parsingImage) && (parsingChannel)) { final String value = buffer.toString().trim(); buffer.setLength(0); if ("url".equals(tag)) theChannel.setImage(value); } else if (parsingItem) { final String value = buffer.toString().trim(); buffer.setLength(0); if (RSSMessage.tags.contains(tag)) item.setValue(tag, value); } else if (parsingChannel) { final String value = buffer.toString().trim(); buffer.setLength(0); if (RSSMessage.tags.contains(tag)) item.setValue(tag, value); } } @Override public void characters(final char ch[], final int start, final int length) { if (parsingItem || parsingChannel) { buffer.append(ch, start, length); } } public RSSFeed getFeed() { return theChannel; } }