|
|
@ -83,14 +83,20 @@ public class htmlParser extends AbstractParser implements Parser {
|
|
|
|
// make a scraper
|
|
|
|
// make a scraper
|
|
|
|
final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
|
|
|
|
final ScraperInputStream htmlFilter = new ScraperInputStream(sourceStream,documentCharset,location,null,false);
|
|
|
|
String charset = null;
|
|
|
|
String charset = null;
|
|
|
|
try {
|
|
|
|
|
|
|
|
charset = htmlFilter.detectCharset();
|
|
|
|
if (documentCharset != null) {
|
|
|
|
} catch (IOException e1) {
|
|
|
|
charset = patchCharsetEncoding(documentCharset);
|
|
|
|
throw new Parser.Failure("Charset error:" + e1.getMessage(), location);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (charset == null) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
charset = htmlFilter.detectCharset();
|
|
|
|
|
|
|
|
} catch (IOException e1) {
|
|
|
|
|
|
|
|
throw new Parser.Failure("Charset error:" + e1.getMessage(), location);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (charset == null) {
|
|
|
|
if (charset == null) {
|
|
|
|
charset = documentCharset;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
charset = patchCharsetEncoding(charset);
|
|
|
|
charset = patchCharsetEncoding(charset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|