@ -45,7 +45,7 @@ public class RSSReader extends DefaultHandler {
// class variables
private RSSMessage item ;
private final StringBuilder buffer ;
private boolean parsingChannel , parsingI mage, parsingI tem;
private boolean parsingChannel , parsingI tem;
private final RSSFeed theChannel ;
private Type type ;
@ -56,38 +56,36 @@ public class RSSReader extends DefaultHandler {
this . buffer = new StringBuilder ( 300 ) ;
this . item = null ;
this . parsingChannel = false ;
this . parsingImage = false ;
this . parsingItem = false ;
this . type = Type . none ;
}
private static final ThreadLocal < SAXParser > tlSax = new ThreadLocal < SAXParser > ( ) ;
private static SAXParser getParser ( ) throws SAXException {
SAXParser parser = tlSax . get ( ) ;
if ( parser = = null ) {
try {
parser = SAXParserFactory . newInstance ( ) . newSAXParser ( ) ;
} catch ( final ParserConfigurationException e ) {
throw new SAXException ( e . getMessage ( ) , e ) ;
}
tlSax . set ( parser ) ;
}
return parser ;
SAXParser parser = tlSax . get ( ) ;
if ( parser = = null ) {
try {
parser = SAXParserFactory . newInstance ( ) . newSAXParser ( ) ;
} catch ( final ParserConfigurationException e ) {
throw new SAXException ( e . getMessage ( ) , e ) ;
}
tlSax . set ( parser ) ;
}
return parser ;
}
public RSSReader ( final int maxsize , InputStream stream , final Type type ) throws IOException {
public RSSReader ( final int maxsize , InputStream stream ) throws IOException {
this ( maxsize ) ;
this . type = type ;
if ( ! ( stream instanceof ByteArrayInputStream ) & & ! ( stream instanceof BufferedInputStream ) ) stream = new BufferedInputStream ( stream ) ;
try {
final SAXParser saxParser = getParser ( ) ;
// do not look at external dtd - see: http://www.ibm.com/developerworks/xml/library/x-tipcfsx/index.html
saxParser . getXMLReader ( ) . setEntityResolver ( new EntityResolver ( ) {
@Override
public InputSource resolveEntity ( final String arg0 , final String arg1 )
throws SAXException , IOException {
return new InputSource ( new StringReader ( "" ) ) ;
}
@Override
public InputSource resolveEntity ( final String arg0 , final String arg1 )
throws SAXException , IOException {
return new InputSource ( new StringReader ( "" ) ) ;
}
} ) ;
saxParser . parse ( stream , this ) ;
} catch ( final SAXException e ) {
@ -106,15 +104,7 @@ public class RSSReader extends DefaultHandler {
throw new IOException ( "response=null" ) ;
}
if ( a . length < 100 ) {
throw new IOException ( "response=" + UTF8 . String ( a ) ) ;
}
if ( ! equals ( a , UTF8 . getBytes ( "<?xml" ) ) & & ! equals ( a , UTF8 . getBytes ( "<rss" ) ) ) {
throw new IOException ( "response does not contain valid xml" ) ;
}
final Type type = findOutType ( a ) ;
if ( type = = Type . none ) {
throw new IOException ( "response incomplete" ) ;
throw new IOException ( "response to short=" + UTF8 . String ( a ) ) ;
}
// make input stream
@ -123,7 +113,7 @@ public class RSSReader extends DefaultHandler {
// parse stream
RSSReader reader = null ;
try {
reader = new RSSReader ( maxsize , bais , type );
reader = new RSSReader ( maxsize , bais );
} catch ( final Exception e ) {
throw new IOException ( "parse exception: " + e . getMessage ( ) , e ) ;
}
@ -131,42 +121,6 @@ public class RSSReader extends DefaultHandler {
return reader ;
}
/ * *
* Tries to find out the type of feed by stepping through its data
* starting in the end and looking at the last XML tag . Just grabbing
* the last few characters of the data does not work since some
* people add quite long comments at the end of their feeds .
* @param a contains the feed
* @return type of feed
* /
private static Type findOutType ( final byte [ ] a ) {
String end ;
int i = 1 ;
do {
/ * In first iteration grab the last 80 characters , after that
* move towards the start of the data and take some more ( 90 )
* to have an overlap in order to not miss anything if the tag
* is on the border of two 80 character blocks .
* /
end = UTF8 . String ( a , a . length - ( i * 80 ) , ( 80 + ( ( i > 1 ) ? 10 : 0 ) ) ) ;
i + + ;
} while ( ! end . contains ( "</" ) ) ;
Type type = Type . none ;
if ( end . indexOf ( "rss" , 0 ) > 0 ) type = Type . rss ;
if ( end . indexOf ( "feed" , 0 ) > 0 ) type = Type . atom ;
if ( end . indexOf ( "rdf" , 0 ) > 0 ) type = Type . rdf ;
return type ;
}
private final static boolean equals ( final byte [ ] buffer , final byte [ ] pattern ) {
// compares two byte arrays: true, if pattern appears completely at offset position
if ( buffer . length < pattern . length ) return false ;
for ( int i = 0 ; i < pattern . length ; i + + ) if ( buffer [ i ] ! = pattern [ i ] ) return false ;
return true ;
}
@Override
public void startElement ( final String uri , final String name , final String tag , final Attributes atts ) throws SAXException {
if ( "channel" . equals ( tag ) ) {
@ -185,16 +139,17 @@ public class RSSReader extends DefaultHandler {
}
this . item = new RSSMessage ( ) ;
this . parsingItem = true ;
} else if ( this . parsingItem & & this . type = = Type . atom & & "link" . equals ( tag ) & & ( atts . getValue ( "type" ) = = null | | this . item . getLink ( ) . length ( ) = = 0 | | atts . getValue ( "type" ) . startsWith ( "text" ) | | atts . getValue ( "type" ) . equals ( "application/xhtml+xml" ) ) ) {
} else if ( this . parsingItem & & this . type = = Type . atom & & "link" . equals ( tag ) & & ( atts . getValue ( "rel" ) = = null | | atts . getValue ( "rel" ) . equals ( "alternate" ) ) ) {
// atom link handling (rss link is handled in endElement)
final String url = atts . getValue ( "href" ) ;
if ( url ! = null & & url . length ( ) > 0 ) this . item . setValue ( Token . link , url ) ;
} else if ( " image". equals ( tag ) | | ( this . pa rsingItem & & thi s. type = = Type . atom & & "link ". equals ( tag ) & & ( atts . getValue ( "type" ) = = null | | atts . getValue ( "type" ) . startsWith ( "image" ) ) ) ) {
this . parsingImage = true ;
} else if ( " rss". equals ( tag ) ) {
this . type = Type . rss ;
}
}
@Override
public void endElement ( final String uri , final String name , final String tag ) {
public void endElement ( final String uri , final String name , final String tag ) throws SAXException {
if ( tag = = null ) return ;
if ( "channel" . equals ( tag ) | | "feed" . equals ( tag ) ) {
if ( this . parsingChannel ) this . theChannel . setChannel ( this . item ) ;
@ -202,12 +157,6 @@ public class RSSReader extends DefaultHandler {
} else if ( "item" . equals ( tag ) | | "entry" . equals ( tag ) ) {
this . theChannel . addMessage ( this . item ) ;
this . parsingItem = false ;
} else if ( "image" . equals ( tag ) ) {
this . parsingImage = false ;
} else if ( ( this . parsingImage ) & & ( this . parsingChannel ) ) {
final String value = this . buffer . toString ( ) . trim ( ) ;
this . buffer . setLength ( 0 ) ;
if ( "url" . equals ( tag ) ) this . theChannel . setImage ( value ) ;
} else if ( this . parsingItem ) {
final String value = this . buffer . toString ( ) . trim ( ) ;
this . buffer . setLength ( 0 ) ;
@ -216,6 +165,9 @@ public class RSSReader extends DefaultHandler {
final String value = this . buffer . toString ( ) . trim ( ) ;
this . buffer . setLength ( 0 ) ;
if ( RSSMessage . tags . contains ( tag ) ) this . item . setValue ( RSSMessage . valueOfNick ( tag ) , value ) ;
} else if ( this . type = = Type . none ) {
// give up if we don't known the feed format
throw new SAXException ( "response incomplete or unknown feed format" ) ;
}
}