|
|
|
@ -30,14 +30,16 @@ import javax.xml.parsers.ParserConfigurationException;
|
|
|
|
|
import javax.xml.parsers.SAXParser;
|
|
|
|
|
import javax.xml.parsers.SAXParserFactory;
|
|
|
|
|
|
|
|
|
|
import net.yacy.cora.document.feed.RSSMessage.Token;
|
|
|
|
|
|
|
|
|
|
import org.xml.sax.Attributes;
|
|
|
|
|
import org.xml.sax.EntityResolver;
|
|
|
|
|
import org.xml.sax.InputSource;
|
|
|
|
|
import org.xml.sax.SAXException;
|
|
|
|
|
import org.xml.sax.helpers.DefaultHandler;
|
|
|
|
|
|
|
|
|
|
import net.yacy.cora.document.feed.RSSMessage.Token;
|
|
|
|
|
import net.yacy.cora.util.StreamLimitException;
|
|
|
|
|
import net.yacy.cora.util.StrictLimitInputStream;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class RSSReader extends DefaultHandler {
|
|
|
|
|
|
|
|
|
@ -47,6 +49,9 @@ public class RSSReader extends DefaultHandler {
|
|
|
|
|
private boolean parsingChannel, parsingItem;
|
|
|
|
|
private final RSSFeed theChannel;
|
|
|
|
|
private Type type;
|
|
|
|
|
|
|
|
|
|
/** When a parsing limit on instance construction has been exceeded */
|
|
|
|
|
private boolean maxBytesExceeded;
|
|
|
|
|
|
|
|
|
|
public enum Type { rss, atom, rdf, none }
|
|
|
|
|
|
|
|
|
@ -57,6 +62,7 @@ public class RSSReader extends DefaultHandler {
|
|
|
|
|
this.parsingChannel = false;
|
|
|
|
|
this.parsingItem = false;
|
|
|
|
|
this.type = Type.none;
|
|
|
|
|
this.maxBytesExceeded = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static final ThreadLocal<SAXParser> tlSax = new ThreadLocal<SAXParser>();
|
|
|
|
@ -91,6 +97,33 @@ public class RSSReader extends DefaultHandler {
|
|
|
|
|
throw new IOException (e.getMessage());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public RSSReader(final int maxsize, final long maxBytes, InputStream stream) throws IOException {
|
|
|
|
|
this(maxsize);
|
|
|
|
|
|
|
|
|
|
if (!(stream instanceof ByteArrayInputStream) && !(stream instanceof BufferedInputStream)) {
|
|
|
|
|
stream = new BufferedInputStream(stream);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
StrictLimitInputStream limitedSource = new StrictLimitInputStream(stream, maxBytes);
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
final SAXParser saxParser = getParser();
|
|
|
|
|
// do not look at external dtd - see: http://www.ibm.com/developerworks/xml/library/x-tipcfsx/index.html
|
|
|
|
|
saxParser.getXMLReader().setEntityResolver(new EntityResolver() {
|
|
|
|
|
@Override
|
|
|
|
|
public InputSource resolveEntity(final String arg0, final String arg1)
|
|
|
|
|
throws SAXException, IOException {
|
|
|
|
|
return new InputSource(new StringReader(""));
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
saxParser.parse(limitedSource, this);
|
|
|
|
|
} catch (final SAXException e) {
|
|
|
|
|
throw new IOException (e.getMessage());
|
|
|
|
|
} catch(StreamLimitException e) {
|
|
|
|
|
this.maxBytesExceeded = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public Type getType() {
|
|
|
|
|
return this.type;
|
|
|
|
@ -177,5 +210,12 @@ public class RSSReader extends DefaultHandler {
|
|
|
|
|
public RSSFeed getFeed() {
|
|
|
|
|
return this.theChannel;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @return true when a parsing limit on instance construction has been exceeded
|
|
|
|
|
*/
|
|
|
|
|
public boolean isMaxBytesExceeded() {
|
|
|
|
|
return this.maxBytesExceeded;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|