|
|
|
@ -547,7 +547,9 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|
|
|
|
|
|
|
|
|
// scrape document to look up charset
|
|
|
|
|
final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page),"UTF-8", new MultiProtocolURI("http://localhost"),null,false);
|
|
|
|
|
final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
|
|
|
|
|
String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
|
|
|
|
|
if(charset == null)
|
|
|
|
|
charset = Charset.defaultCharset().toString();
|
|
|
|
|
|
|
|
|
|
// scrape content
|
|
|
|
|
final ContentScraper scraper = new ContentScraper(new MultiProtocolURI("http://localhost"));
|
|
|
|
|