* remove all encoding-stuff from proxy

encoding is handled by parsers or browser, proxy only passes through


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5410 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 16 years ago
parent 73c8a0839c
commit 2d2ce24011

@ -80,7 +80,6 @@ import java.util.zip.GZIPOutputStream;
import de.anomic.crawler.HTTPLoader; import de.anomic.crawler.HTTPLoader;
import de.anomic.htmlFilter.htmlFilterContentTransformer; import de.anomic.htmlFilter.htmlFilterContentTransformer;
import de.anomic.htmlFilter.htmlFilterTransformer; import de.anomic.htmlFilter.htmlFilterTransformer;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.index.indexDocumentMetadata; import de.anomic.index.indexDocumentMetadata;
import de.anomic.index.indexReferenceBlacklist; import de.anomic.index.indexReferenceBlacklist;
import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaHTCache;
@ -227,28 +226,6 @@ public final class httpdProxyHandler {
*/ */
private static final StringBuilder userAgentStr = new StringBuilder(); private static final StringBuilder userAgentStr = new StringBuilder();
/**
* A Set of media types which are known to only contain binary data (no readable text)
* Each is only the first part of the content-type field (no subtypes)
*/
private static final Set<String> binaryTypes = new HashSet<String>();
/**
* A Set of content-types which are known to only contain binary data (no readable text)
* Each is a complete content-type header field (without parameters)
*/
private static final Set<String> binaryContent = new HashSet<String>();
static {
// all Strings must be lower case!!
// RFC 2045: "Matching of media type and subtype is ALWAYS case-insensitive."
// discrete types
binaryTypes.add("image");
binaryTypes.add("audio");
binaryTypes.add("video");
binaryContent.add("application/octet-stream");
}
public static void handleOutgoingCookies(final httpRequestHeader requestHeader, final String targethost, final String clienthost) { public static void handleOutgoingCookies(final httpRequestHeader requestHeader, final String targethost, final String clienthost) {
/* /*
The syntax for the header is: The syntax for the header is:
@ -467,7 +444,6 @@ public final class httpdProxyHandler {
private static void fulfillRequestFromWeb(final Properties conProp, final yacyURL url,final String ext, final httpRequestHeader requestHeader, final httpResponseHeader cachedResponseHeader, final OutputStream respond) { private static void fulfillRequestFromWeb(final Properties conProp, final yacyURL url,final String ext, final httpRequestHeader requestHeader, final httpResponseHeader cachedResponseHeader, final OutputStream respond) {
final GZIPOutputStream gzippedOut = null; final GZIPOutputStream gzippedOut = null;
Writer textOutput = null;
JakartaCommonsHttpResponse res = null; JakartaCommonsHttpResponse res = null;
try { try {
@ -542,29 +518,6 @@ public final class httpdProxyHandler {
); );
plasmaHTCache.storeMetadata(responseHeader, cacheEntry); plasmaHTCache.storeMetadata(responseHeader, cacheEntry);
// handle file types and make (possibly transforming) output stream
final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);
final boolean isBinary = isBinary(responseHeader);
if (isBinary) {
if (theLogger.isFine()) theLogger.logFine(reqID +" create direct passthrough for URL " + url + ", extension '" + ext + "', mime-type '" + responseHeader.mime() + "'");
} else {
// handle text stuff (encoding and so on)
final Charset charSet = responseHeader.getCharSet();
if (
(!transformer.isIdentityTransformer()) &&
(plasmaParser.supportedHTMLContent(url,responseHeader.mime()))
) {
// make a transformer
if (theLogger.isFine()) theLogger.logFine(reqID +" create transformer for URL " + url);
//hfos = new htmlFilterOutputStream((gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond), null, transformer, (ext.length() == 0));
textOutput = new htmlFilterWriter(outStream,charSet, null, transformer, (ext.length() == 0));
} else {
// simply pass through without parsing
if (theLogger.isFine()) theLogger.logFine(reqID +" create text passthrough for URL " + url + ", extension '" + ext + "', mime-type '" + responseHeader.mime() + "'");
textOutput = new OutputStreamWriter(outStream, charSet);
}
}
// handle incoming cookies // handle incoming cookies
handleIncomingCookies(responseHeader, host, ip); handleIncomingCookies(responseHeader, host, ip);
@ -586,6 +539,8 @@ public final class httpdProxyHandler {
if(hasBody(res.getStatusCode())) { if(hasBody(res.getStatusCode())) {
final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);
final String storeError = cacheEntry.shallStoreCacheForProxy(); final String storeError = cacheEntry.shallStoreCacheForProxy();
final boolean storeHTCache = cacheEntry.profile().storeHTCache(); final boolean storeHTCache = cacheEntry.profile().storeHTCache();
final boolean isSupportedContent = plasmaParser.supportedContent(plasmaParser.PARSER_MODE_PROXY,cacheEntry.url(),cacheEntry.getMimeType()); final boolean isSupportedContent = plasmaParser.supportedContent(plasmaParser.PARSER_MODE_PROXY,cacheEntry.url(),cacheEntry.getMimeType());
@ -604,12 +559,9 @@ public final class httpdProxyHandler {
// we don't write actually into a file, only to RAM, and schedule writing the file. // we don't write actually into a file, only to RAM, and schedule writing the file.
int l = res.getResponseHeader().size(); int l = res.getResponseHeader().size();
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream((l < 32) ? 32 : l); final ByteArrayOutputStream byteStream = new ByteArrayOutputStream((l < 32) ? 32 : l);
if(isBinary) {
final OutputStream toClientAndMemory = new MultiOutputStream(new OutputStream[] {outStream, byteStream}); final OutputStream toClientAndMemory = new MultiOutputStream(new OutputStream[] {outStream, byteStream});
serverFileUtils.copy(res.getDataAsStream(), toClientAndMemory); serverFileUtils.copy(res.getDataAsStream(), toClientAndMemory);
} else {
writeTextContent(res, new BufferedWriter(textOutput), byteStream);
}
// cached bytes // cached bytes
byte[] cacheArray; byte[] cacheArray;
if(byteStream.size() > 0) { if(byteStream.size() > 0) {
@ -619,8 +571,6 @@ public final class httpdProxyHandler {
} }
if (theLogger.isFine()) theLogger.logFine(reqID +" writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length))); if (theLogger.isFine()) theLogger.logFine(reqID +" writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length)));
if (textOutput instanceof htmlFilterWriter) ((htmlFilterWriter) textOutput).close();
if (sizeBeforeDelete == -1) { if (sizeBeforeDelete == -1) {
// totally fresh file // totally fresh file
//cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
@ -647,14 +597,8 @@ public final class httpdProxyHandler {
" StoreHTCache=" + storeHTCache + " StoreHTCache=" + storeHTCache +
" SupportetContent=" + isSupportedContent); " SupportetContent=" + isSupportedContent);
if(isBinary) {
// directly pass bytes to client
serverFileUtils.copy(res.getDataAsStream(), outStream); serverFileUtils.copy(res.getDataAsStream(), outStream);
} else {
// read data with specified encoding and send it as character stream
writeTextContent(res, new BufferedWriter(textOutput));
}
if (textOutput instanceof htmlFilterWriter) ((htmlFilterWriter) textOutput).close();
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
} }
@ -712,7 +656,6 @@ public final class httpdProxyHandler {
final httpChunkedOutputStream chunkedOut = null; final httpChunkedOutputStream chunkedOut = null;
final GZIPOutputStream gzippedOut = null; final GZIPOutputStream gzippedOut = null;
Writer textOutput = null;
// we respond on the request by using the cache, the cache is fresh // we respond on the request by using the cache, the cache is fresh
try { try {
@ -757,22 +700,12 @@ public final class httpdProxyHandler {
httpd.sendRespondHeader(conProp,respond,httpVer,203,cachedResponseHeader); httpd.sendRespondHeader(conProp,respond,httpVer,203,cachedResponseHeader);
//respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative' //respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative'
// determine the content charset
final Charset charSet = cachedResponseHeader.getCharSet();
// make a transformer
final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond); final OutputStream outStream = (gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond);
if (( !transformer.isIdentityTransformer()) &&
(ext == null || !plasmaParser.supportedHTMLFileExtContains(url)) &&
(plasmaParser.HTMLParsableMimeTypesContains(cachedResponseHeader.mime()))) {
textOutput = new htmlFilterWriter(outStream, charSet, null, transformer, (ext == null || ext.length() == 0));
}
// send also the complete body now from the cache // send also the complete body now from the cache
// simply read the file and transfer to out socket // simply read the file and transfer to out socket
serverFileUtils.copy(cacheEntry, outStream); serverFileUtils.copy(cacheEntry, outStream);
if (textOutput != null) textOutput.close();
if (gzippedOut != null) gzippedOut.finish(); if (gzippedOut != null) gzippedOut.finish();
if (chunkedOut != null) chunkedOut.finish(); if (chunkedOut != null) chunkedOut.finish();
} }
@ -792,68 +725,6 @@ public final class httpdProxyHandler {
return; return;
} }
/**
* determines if the body is text or not
*
* @param responseHeader
* @return
*/
private static boolean isBinary(httpResponseHeader responseHeader) {
String mime = responseHeader.mime().toLowerCase();
if(mime.contains(";")) {
// cut of parameters
mime = mime.substring(0, mime.indexOf(';'));
}
// mime and the contents of the Set must be lower case!
if(binaryContent.contains(mime)) {
return true;
}
final int endType = mime.contains("/") ? mime.indexOf('/') : mime.length();
final String type = mime.substring(0, endType);
if(binaryTypes.contains(type)) {
return true;
}
return false;
}
/**
* ready the body of res with charSet and write it to output
*
* @param res
* @param output
* @throws IOException
*/
public static void writeTextContent(final JakartaCommonsHttpResponse res, final BufferedWriter output) throws IOException {
try {
final InputStream data = res.getDataAsStream();
if (data == null) return;
final Charset charSet = res.getResponseHeader().getCharSet();
serverFileUtils.copyToWriter(new BufferedInputStream(data), output, charSet);
} finally {
res.closeStream();
}
}
/**
* ready the body of res with charSet and write it to output and parallel encoded with charSet to byteStream
*
* @param res
* @param output
* @param byteStream
* @throws IOException
*/
public static void writeTextContent(final JakartaCommonsHttpResponse res, final BufferedWriter output, final OutputStream byteStream) throws IOException {
assert byteStream != null;
try {
final InputStream data = res.getDataAsStream();
if (data == null) return;
final Charset charSet = res.getResponseHeader().getCharSet();
serverFileUtils.copyToWriters(new BufferedInputStream(data), output, new BufferedWriter(new OutputStreamWriter(byteStream, charSet)) , charSet);
} finally {
res.closeStream();
}
}
public static void doHead(final Properties conProp, final httpRequestHeader requestHeader, OutputStream respond) { public static void doHead(final Properties conProp, final httpRequestHeader requestHeader, OutputStream respond) {
JakartaCommonsHttpResponse res = null; JakartaCommonsHttpResponse res = null;
@ -1084,11 +955,7 @@ public final class httpdProxyHandler {
if (chunked != null) chunked.finish(); if (chunked != null) chunked.finish();
*/ */
final OutputStream outStream = (chunked != null) ? chunked : countedRespond; final OutputStream outStream = (chunked != null) ? chunked : countedRespond;
if(isBinary(responseHeader)) {
serverFileUtils.copy(res.getDataAsStream(), outStream); serverFileUtils.copy(res.getDataAsStream(), outStream);
} else {
writeTextContent(res, new BufferedWriter(new OutputStreamWriter(outStream, responseHeader.getCharSet())));
}
if (chunked != null) { if (chunked != null) {
chunked.finish(); chunked.finish();

@ -354,6 +354,10 @@ public final class yacyVersion implements Comparator<yacyVersion>, Comparable<ya
} }
try { try {
serverFileUtils.copyToStream(new BufferedInputStream(res.getDataAsStream()), new BufferedOutputStream(new FileOutputStream(download))); serverFileUtils.copyToStream(new BufferedInputStream(res.getDataAsStream()), new BufferedOutputStream(new FileOutputStream(download)));
} catch(IOException ie) {
// Saving file failed, abort download
res.abort();
throw ie;
} finally { } finally {
res.closeStream(); res.closeStream();
} }

Loading…
Cancel
Save