* files distributed by yacy are utf-8, files from repository use the system default charset

* fixes http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1564#p11092
  and http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1550


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5345 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 16 years ago
parent 8c96bc2ac1
commit d49ffcd818

@ -696,17 +696,24 @@ public final class httpdFileHandler {
fis = new BufferedInputStream(new FileInputStream(targetFile)); fis = new BufferedInputStream(new FileInputStream(targetFile));
} }
// detect charset of html-files if(mimeType.startsWith("text")) {
if(path.endsWith("html") || path.endsWith("htm")) { // every text-file distributed by yacy is UTF-8
// save position if(!path.startsWith("/repository")) {
fis.mark(1000); mimeType = mimeType + "; charset=UTF-8";
// scrape document to look up charset } else {
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false); // detect charset of html-files
final String charset = plasmaParser.patchCharsetEncoding(htmlFilter.detectCharset()); if((path.endsWith("html") || path.endsWith("htm"))) {
// reset position // save position
fis.reset(); fis.mark(1000);
if(charset != null) // scrape document to look up charset
mimeType = mimeType + "; charset="+charset; final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
final String charset = plasmaParser.patchCharsetEncoding(htmlFilter.detectCharset());
if(charset != null)
mimeType = mimeType + "; charset="+charset;
// reset position
fis.reset();
}
}
} }
// write the array to the client // write the array to the client

Loading…
Cancel
Save