(gzip decompression, httploader, robots, ...) + enable proxy-crawling while log is fine git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7001 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
a55af783bf
commit
15e8c13526
@ -0,0 +1,29 @@
|
||||
package net.yacy.cora.protocol;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.entity.HttpEntityWrapper;
|
||||
|
||||
public class GzipDecompressingEntity extends HttpEntityWrapper {
|
||||
|
||||
public GzipDecompressingEntity(final HttpEntity entity) {
|
||||
super(entity);
|
||||
}
|
||||
|
||||
public InputStream getContent() throws IOException, IllegalStateException {
|
||||
|
||||
// the wrapped entity's getContent() decides about repeatability
|
||||
InputStream wrappedin = wrappedEntity.getContent();
|
||||
|
||||
return new GZIPInputStream(wrappedin);
|
||||
}
|
||||
|
||||
public long getContentLength() {
|
||||
// length of ungzipped content not known in advance
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
package net.yacy.cora.protocol;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.http.HttpException;
|
||||
import org.apache.http.HttpRequest;
|
||||
import org.apache.http.HttpRequestInterceptor;
|
||||
import org.apache.http.protocol.HttpContext;
|
||||
|
||||
public class GzipRequestInterceptor implements HttpRequestInterceptor {
|
||||
|
||||
private static final String ACCEPT_ENCODING = "Accept-Encoding";
|
||||
private static final String GZIP_CODEC = "gzip";
|
||||
|
||||
public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
|
||||
if (!request.containsHeader(ACCEPT_ENCODING)) {
|
||||
request.addHeader(ACCEPT_ENCODING, GZIP_CODEC);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package net.yacy.cora.protocol;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.http.Header;
|
||||
import org.apache.http.HeaderElement;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpException;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.HttpResponseInterceptor;
|
||||
import org.apache.http.protocol.HttpContext;
|
||||
|
||||
public class GzipResponseInterceptor implements HttpResponseInterceptor {
|
||||
|
||||
private static final String GZIP_CODEC = "gzip";
|
||||
|
||||
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
|
||||
if (context == null) {
|
||||
throw new IllegalArgumentException("HTTP context may not be null");
|
||||
}
|
||||
HttpEntity entity = response.getEntity();
|
||||
if (entity != null) {
|
||||
Header ceheader = entity.getContentEncoding();
|
||||
if (ceheader != null) {
|
||||
HeaderElement[] codecs = ceheader.getElements();
|
||||
for (int i = 0; i < codecs.length; i++) {
|
||||
if (codecs[i].getName().equalsIgnoreCase(GZIP_CODEC)) {
|
||||
// response.removeHeader(ceheader);
|
||||
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue