* http-server: submit charset from html metatags

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5314 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 16 years ago
parent 69e695bd4b
commit 4b4ce75396

@ -76,6 +76,7 @@ import java.util.Properties;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterInputStream;
import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSwitchboardConstants; import de.anomic.plasma.plasmaSwitchboardConstants;
@ -88,6 +89,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties; import de.anomic.server.servletProperties;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyURL;
import de.anomic.ymage.ymageMatrix; import de.anomic.ymage.ymageMatrix;
public final class httpdFileHandler { public final class httpdFileHandler {
@ -564,7 +566,7 @@ public final class httpdFileHandler {
// we have found a file that can be written to the client // we have found a file that can be written to the client
// if this file uses templates, then we use the template // if this file uses templates, then we use the template
// re-write - method to create an result // re-write - method to create an result
final String mimeType = mimeTable.getProperty(targetExt,"text/html"); String mimeType = mimeTable.getProperty(targetExt,"text/html");
final boolean zipContent = requestHeader.acceptGzip() && httpd.shallTransportZipped("." + conProp.getProperty("EXT","")); final boolean zipContent = requestHeader.acceptGzip() && httpd.shallTransportZipped("." + conProp.getProperty("EXT",""));
if (path.endsWith("html") || if (path.endsWith("html") ||
path.endsWith("xml") || path.endsWith("xml") ||
@ -693,6 +695,19 @@ public final class httpdFileHandler {
fis = new BufferedInputStream(new FileInputStream(targetFile)); fis = new BufferedInputStream(new FileInputStream(targetFile));
} }
// detect charset of html-files
if(path.endsWith("html") || path.endsWith("htm")) {
// save position
fis.mark(1000);
// scrape document to look up charset
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
final String charset = htmlFilter.detectCharset();
// reset position
fis.reset();
if(charset != null)
mimeType = mimeType + "; charset="+charset;
}
// write the array to the client // write the array to the client
// we can do that either in standard mode (whole thing completely) or in chunked mode // we can do that either in standard mode (whole thing completely) or in chunked mode
// since yacy clients do not understand chunked mode (yet), we use this only for communication with the administrator // since yacy clients do not understand chunked mode (yet), we use this only for communication with the administrator

Loading…
Cancel
Save