augmentedProxy, which forwards every proxy request to a

rewrite engine to customize existing webpages. originally implemented by
Florian Richter.

Conflicts:
	source/de/anomic/http/server/HTTPDProxyHandler.java
pull/1/head
cominch 13 years ago committed by Michael Peter Christen
parent 1626be7916
commit 9cbfc1a1c0

@ -10,7 +10,6 @@ import java.util.ArrayList;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import de.anomic.http.server.RobotsTxtConfig; import de.anomic.http.server.RobotsTxtConfig;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -75,6 +74,6 @@ public class robots {
htrootFiles.add(htroots[i]); htrootFiles.add(htroots[i]);
} }
} }
return new ArrayList[] { htrootFiles, htrootDirs }; return (ArrayList<String>[]) new Object[] { htrootFiles, htrootDirs };
} }
} }

@ -0,0 +1,72 @@
package de.anomic.http.server;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.interaction.AugmentHtmlStream;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
public class AugmentedHtmlStream extends FilterOutputStream {
private Writer out;
private ByteArrayOutputStream buffer;
private Charset charset;
private DigestURI url;
private byte[] urlhash;
private RequestHeader requestHeader;
public AugmentedHtmlStream(OutputStream out, Charset charset, DigestURI url, byte[] urlhash, RequestHeader requestHeader) {
super(out);
this.out = new BufferedWriter(new OutputStreamWriter(out, charset));
this.buffer = new ByteArrayOutputStream();
this.charset = charset;
this.url = url;
this.urlhash = urlhash;
this.requestHeader = requestHeader;
}
public void write(int b) throws IOException {
this.buffer.write(b);
}
public void write(byte[] b, int off, int len) throws IOException {
this.buffer.write(b, off, len);
}
public void close() throws IOException {
StringBuffer b = new StringBuffer(this.buffer.toString(charset.name()));
b = process(b);
out.write(b.toString());
out.close();
}
public StringBuffer process(StringBuffer data) {
if (Switchboard.getSwitchboard().getConfigBool("proxyAugmentation", false) == true) {
if (!this.url.toNormalform(false, true).contains("currentyacypeer/")) {
return AugmentHtmlStream.process (data, charset, url, requestHeader);
} else {
return data;
}
} else {
return data;
}
}
public static boolean supportsMime(String mime) {
// System.out.println("mime" +mime);
return mime.split(";")[0].equals("text/html");
}
}

@ -77,7 +77,6 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.protocol.http.ProxySettings; import net.yacy.cora.protocol.http.ProxySettings;
import net.yacy.cora.util.NumberTools;
import net.yacy.document.TextParser; import net.yacy.document.TextParser;
import net.yacy.document.parser.html.ContentTransformer; import net.yacy.document.parser.html.ContentTransformer;
import net.yacy.document.parser.html.Transformer; import net.yacy.document.parser.html.Transformer;
@ -342,7 +341,7 @@ public final class HTTPDProxyHandler {
if ((pos = host.indexOf(':')) < 0) { if ((pos = host.indexOf(':')) < 0) {
port = 80; port = 80;
} else { } else {
port = NumberTools.parseIntDecSubstring(host, pos + 1); port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos); host = host.substring(0, pos);
} }
@ -406,7 +405,7 @@ public final class HTTPDProxyHandler {
cachedResponseHeader, cachedResponseHeader,
"200 OK", "200 OK",
sb.crawler.defaultProxyProfile, sb.crawler.defaultProxyProfile,
false true
); );
final byte[] cacheContent = Cache.getContent(url.hash()); final byte[] cacheContent = Cache.getContent(url.hash());
if (cacheContent != null && response.isFreshForProxy()) { if (cacheContent != null && response.isFreshForProxy()) {
@ -458,7 +457,7 @@ public final class HTTPDProxyHandler {
if ((pos = host.indexOf(':')) < 0) { if ((pos = host.indexOf(':')) < 0) {
port = 80; port = 80;
} else { } else {
port = NumberTools.parseIntDecSubstring(host, pos + 1); port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos); host = host.substring(0, pos);
} }
@ -479,6 +478,8 @@ public final class HTTPDProxyHandler {
final String connectHost = hostPart(host, port, yAddress); final String connectHost = hostPart(host, port, yAddress);
final String getUrl = "http://"+ connectHost + remotePath; final String getUrl = "http://"+ connectHost + remotePath;
requestHeader.remove(HeaderFramework.HOST);
final HTTPClient client = setupHttpClient(requestHeader, connectHost); final HTTPClient client = setupHttpClient(requestHeader, connectHost);
// send request // send request
@ -493,7 +494,14 @@ public final class HTTPDProxyHandler {
throw new Exception(client.getHttpResponse().getStatusLine().toString()); throw new Exception(client.getHttpResponse().getStatusLine().toString());
} }
final ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond); if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
// enable chunk encoding, because we don't know the length after annotating
responseHeader.remove(HeaderFramework.CONTENT_LENGTH);
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
}
ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond);
// the cache does either not exist or is (supposed to be) stale // the cache does either not exist or is (supposed to be) stale
long sizeBeforeDelete = -1; long sizeBeforeDelete = -1;
@ -528,8 +536,8 @@ public final class HTTPDProxyHandler {
// prepareResponseHeader(responseHeader, res.getHttpVer()); // prepareResponseHeader(responseHeader, res.getHttpVer());
prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString()); prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString());
// sending the respond header back to the client if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
if (chunkedOut != null) { // chunked encoding disables somewhere, add it again
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
} }
@ -544,18 +552,22 @@ public final class HTTPDProxyHandler {
if (hasBody(client.getHttpResponse().getStatusLine().getStatusCode())) { if (hasBody(client.getHttpResponse().getStatusLine().getStatusCode())) {
final OutputStream outStream = chunkedOut != null ? chunkedOut : respond; OutputStream outStream = chunkedOut != null ? chunkedOut : respond;
final Response response = new Response( final Response response = new Response(
request, request,
requestHeader, requestHeader,
responseHeader, responseHeader,
Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()), Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()),
sb.crawler.defaultProxyProfile, sb.crawler.defaultProxyProfile,
false true
); );
final String storeError = response.shallStoreCacheForProxy(); final String storeError = response.shallStoreCacheForProxy();
final boolean storeHTCache = response.profile().storeHTCache(); final boolean storeHTCache = response.profile().storeHTCache();
final String supportError = TextParser.supports(response.url(), response.getMimeType()); final String supportError = TextParser.supports(response.url(), response.getMimeType());
if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
outStream = new AugmentedHtmlStream(outStream, responseHeader.getCharSet(), url, url.hash(), requestHeader);
}
if ( if (
/* /*
* Now we store the response into the htcache directory if * Now we store the response into the htcache directory if
@ -624,6 +636,7 @@ public final class HTTPDProxyHandler {
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
} }
outStream.close();
if (chunkedOut != null) { if (chunkedOut != null) {
chunkedOut.finish(); chunkedOut.finish();
chunkedOut.flush(); chunkedOut.flush();
@ -673,7 +686,7 @@ public final class HTTPDProxyHandler {
final RequestHeader requestHeader, final RequestHeader requestHeader,
final ResponseHeader cachedResponseHeader, final ResponseHeader cachedResponseHeader,
final byte[] cacheEntry, final byte[] cacheEntry,
final OutputStream respond OutputStream respond
) throws IOException { ) throws IOException {
final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
@ -710,6 +723,10 @@ public final class HTTPDProxyHandler {
HTTPDemon.sendRespondHeader(conProp,respond,httpVer,203,cachedResponseHeader); HTTPDemon.sendRespondHeader(conProp,respond,httpVer,203,cachedResponseHeader);
//respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative' //respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative'
if(AugmentedHtmlStream.supportsMime(cachedResponseHeader.mime())) {
respond = new AugmentedHtmlStream(respond, cachedResponseHeader.getCharSet(), url, url.hash(), requestHeader);
}
// send also the complete body now from the cache // send also the complete body now from the cache
// simply read the file and transfer to out socket // simply read the file and transfer to out socket
FileUtils.copy(cacheEntry, respond); FileUtils.copy(cacheEntry, respond);
@ -754,7 +771,7 @@ public final class HTTPDProxyHandler {
if ((pos = host.indexOf(':')) < 0) { if ((pos = host.indexOf(':')) < 0) {
port = 80; port = 80;
} else { } else {
port = NumberTools.parseIntDecSubstring(host, pos + 1); port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos); host = host.substring(0, pos);
} }
@ -871,7 +888,7 @@ public final class HTTPDProxyHandler {
if ((pos = host.indexOf(':')) < 0) { if ((pos = host.indexOf(':')) < 0) {
port = 80; port = 80;
} else { } else {
port = NumberTools.parseIntDecSubstring(host, pos + 1); port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos); host = host.substring(0, pos);
} }
@ -1086,6 +1103,7 @@ public final class HTTPDProxyHandler {
forceConnectionClose(conProp); forceConnectionClose(conProp);
} else { } else {
chunkedOut = new ChunkedOutputStream(respond); chunkedOut = new ChunkedOutputStream(respond);
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
} }
responseHeader.remove(HeaderFramework.CONTENT_LENGTH); responseHeader.remove(HeaderFramework.CONTENT_LENGTH);
} }
@ -1155,7 +1173,7 @@ public final class HTTPDProxyHandler {
headers.remove(RequestHeader.X_CACHE_LOOKUP); headers.remove(RequestHeader.X_CACHE_LOOKUP);
// remove transfer encoding header // remove transfer encoding header
headers.remove(HeaderFramework.TRANSFER_ENCODING); // headers.remove(HeaderFramework.TRANSFER_ENCODING);
//removing yacy status headers //removing yacy status headers
headers.remove(HeaderFramework.X_YACY_KEEP_ALIVE_REQUEST_COUNT); headers.remove(HeaderFramework.X_YACY_KEEP_ALIVE_REQUEST_COUNT);
@ -1197,7 +1215,7 @@ public final class HTTPDProxyHandler {
if ((pos = host.indexOf(':')) < 0) { if ((pos = host.indexOf(':')) < 0) {
port = 80; port = 80;
} else { } else {
port = NumberTools.parseIntDecSubstring(host, pos + 1); port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos); host = host.substring(0, pos);
} }

@ -87,6 +87,7 @@ public class genericImageParser extends AbstractParser implements Parser {
super("Generic Image Parser"); super("Generic Image Parser");
} }
@Override
public Document[] parse( public Document[] parse(
final MultiProtocolURI location, final MultiProtocolURI location,
final String mimeType, final String mimeType,
@ -211,10 +212,12 @@ public class genericImageParser extends AbstractParser implements Parser {
false)}; // images false)}; // images
} }
@Override
public Set<String> supportedMimeTypes() { public Set<String> supportedMimeTypes() {
return SUPPORTED_MIME_TYPES; return SUPPORTED_MIME_TYPES;
} }
@Override
public Set<String> supportedExtensions() { public Set<String> supportedExtensions() {
return SUPPORTED_EXTENSIONS; return SUPPORTED_EXTENSIONS;
} }

@ -0,0 +1,25 @@
package net.yacy.interaction;
import java.nio.charset.Charset;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
public class AugmentHtmlStream {
public static StringBuffer process (StringBuffer data, Charset charset, DigestURI url, RequestHeader requestHeader) {
boolean augmented = false;
String Doc = data.toString();
if (augmented) {
return (new StringBuffer (Doc));
} else {
return (data);
}
}
}

@ -628,7 +628,7 @@ public class ArrayStack implements BLOB {
*/ */
@Override @Override
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException { public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
if (this.blobs.size() == 0) return null; if (this.blobs == null || this.blobs.size() == 0) return null;
if (this.blobs.size() == 1) { if (this.blobs.size() == 1) {
final blobItem bi = this.blobs.get(0); final blobItem bi = this.blobs.get(0);
return bi.blob.get(key); return bi.blob.get(key);

Loading…
Cancel
Save