* detect charset for directory index

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5313 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 17 years ago
parent 340ecd919d
commit 69e695bd4b

@ -485,10 +485,14 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
final byte[] page = serverFileUtils.read(file); final byte[] page = serverFileUtils.read(file);
if (page == null) throw new IOException("no content in file " + file.toString()); if (page == null) throw new IOException("no content in file " + file.toString());
// scrape document to look up charset
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(new ByteArrayInputStream(page),"UTF-8",new yacyURL("http://localhost", null),null,false);
final String charset = htmlFilter.detectCharset();
// scrape content // scrape content
final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL("http://localhost", null)); final htmlFilterContentScraper scraper = new htmlFilterContentScraper(new yacyURL("http://localhost", null));
final Writer writer = new htmlFilterWriter(null, null, scraper, null, false); final Writer writer = new htmlFilterWriter(null, null, scraper, null, false);
serverFileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName("UTF-8")); serverFileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName(charset));
return scraper; return scraper;
} }

Loading…
Cancel
Save