Add a max acceptable limit to the size of Solr responses on p2p search

Following activation of gzip compression on responses, to ensure
uncompressed content can fit on available memory.
pull/183/head
luccioman 7 years ago
parent de4ea95687
commit bd4cfeda3f

@ -66,6 +66,7 @@ import org.apache.solr.update.UpdateShardHandler.IdleConnectionsEvictor;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.http.StrictSizeLimitResponseInterceptor;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.Memory;
@ -143,7 +144,7 @@ public class RemoteInstance implements SolrInstance {
}
return instances;
}
/**
* @param url
* the remote Solr URL. A default localhost URL is assumed when null.
@ -161,6 +162,30 @@ public class RemoteInstance implements SolrInstance {
*/
public RemoteInstance(final String url, final Collection<String> coreNames, final String defaultCoreName,
final int timeout, final boolean trustSelfSignedOnAuthenticatedServer) throws IOException {
this(url, coreNames, defaultCoreName, timeout, trustSelfSignedOnAuthenticatedServer, Long.MAX_VALUE);
}
/**
* @param url
* the remote Solr URL. A default localhost URL is assumed when null.
* @param coreNames
* the Solr core names for the main collection and the webgraph
* @param defaultCoreName
* the core name of the main collection
* @param timeout
* the connection timeout in milliseconds
* @param trustSelfSignedOnAuthenticatedServer
* when true, self-signed certificates are accepcted for an https
* connection to a remote server with authentication credentials
* @param maxBytesPerReponse
* maximum acceptable decompressed size in bytes for a response from
* the remote Solr server. Negative value or Long.MAX_VALUE means no
* limit.
* @throws IOException
* when a connection could not be opened to the remote Solr instance
*/
public RemoteInstance(final String url, final Collection<String> coreNames, final String defaultCoreName,
final int timeout, final boolean trustSelfSignedOnAuthenticatedServer, final long maxBytesPerResponse) throws IOException {
this.timeout = timeout;
this.server= new HashMap<String, ConcurrentUpdateSolrClient>();
this.solrurl = url == null ? "http://127.0.0.1:8983/solr/" : url; // that should work for the example configuration of solr 4.x.x
@ -212,10 +237,10 @@ public class RemoteInstance implements SolrInstance {
}
}
if (solraccount.length() > 0) {
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, trustSelfSignedOnAuthenticatedServer);
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, trustSelfSignedOnAuthenticatedServer, maxBytesPerResponse);
} else if(u.isHTTPS()){
/* Here we must trust self-signed certificates as most peers with SSL enabled use such certificates */
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, true);
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, true, maxBytesPerResponse);
} else {
/* Build a http client using the Solr utils as in the HttpSolrClient constructor implementation.
* The main difference is that a shared connection manager is used (configured in the buildConnectionManager() function) */
@ -224,9 +249,20 @@ public class RemoteInstance implements SolrInstance {
/* Accept gzip compression of responses to reduce network usage */
params.set(HttpClientUtil.PROP_ALLOW_COMPRESSION, true);
this.client = HttpClientUtil.createClient(params, CONNECTION_MANAGER);
if(this.client instanceof DefaultHttpClient && this.client.getParams() != null) {
/* Set the maximum time to get a connection from the shared connections pool */
HttpClientParams.setConnectionManagerTimeout(this.client.getParams(), timeout);
if(this.client instanceof DefaultHttpClient) {
if(this.client.getParams() != null) {
/* Set the maximum time to get a connection from the shared connections pool */
HttpClientParams.setConnectionManagerTimeout(this.client.getParams(), timeout);
}
if (maxBytesPerResponse >= 0 && maxBytesPerResponse < Long.MAX_VALUE) {
/*
* Add in last position the eventual interceptor limiting the response size, so
* that this is the decompressed amount of bytes that is considered
*/
((DefaultHttpClient)this.client).addResponseInterceptor(new StrictSizeLimitResponseInterceptor(maxBytesPerResponse),
((DefaultHttpClient)this.client).getResponseInterceptorCount());
}
}
}
@ -298,10 +334,14 @@ public class RemoteInstance implements SolrInstance {
* @param solraccount eventual user name used to authenticate on the target Solr
* @param solraccount eventual password used to authenticate on the target Solr
* @param trustSelfSignedCertificates when true, https connections to an host providing a self-signed certificate are accepted
* @param maxBytesPerReponse
* maximum acceptable decompressed size in bytes for a response from
* the remote Solr server. Negative value or Long.MAX_VALUE means no
* limit.
* @return a new apache HttpClient instance usable as a custom http client by SolrJ
*/
private static HttpClient buildCustomHttpClient(final int timeout, final MultiProtocolURL u, final String solraccount, final String solrpw,
final String host, final boolean trustSelfSignedCertificates) {
final String host, final boolean trustSelfSignedCertificates, final long maxBytesPerResponse) {
/* Important note : use of deprecated Apache classes is required because SolrJ still use them internally (see HttpClientUtil).
* Upgrade only when Solr implementation will become compatible */
@ -362,6 +402,15 @@ public class RemoteInstance implements SolrInstance {
result.setCredentialsProvider(credsProvider);
}
if (maxBytesPerResponse >= 0 && maxBytesPerResponse < Long.MAX_VALUE) {
/*
* Add in last position the eventual interceptor limiting the response size, so
* that this is the decompressed amount of bytes that is considered
*/
result.addResponseInterceptor(new StrictSizeLimitResponseInterceptor(maxBytesPerResponse),
result.getResponseInterceptorCount());
}
return result;
}

@ -0,0 +1,92 @@
// StrictSizeLimitEntityWrapper.java
// ---------------------------
// Copyright 2018 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.protocol.http;
import java.io.IOException;
import java.io.InputStream;
import org.apache.http.HttpEntity;
import org.apache.http.entity.HttpEntityWrapper;
import net.yacy.cora.util.StrictLimitInputStream;
/**
* HTTP entity wrapper used to strictly limit the size of the response content
* fetched from an http connection.
*
*/
public class StrictSizeLimitEntityWrapper extends HttpEntityWrapper {
/** Reusable wrapped content stream */
private InputStream content;
/** Maximum amount of bytes to fetch from the http response body */
private final long maxBytes;
/**
* @param wrappedEntity
* the http entity to wrap. Must not be null.
* @param maxBytes
* the maximum amount of bytes to fetch from the http response body
* @throws IllegalArgumentException
* when wrappedEntity parameter is null or when maxBytes value is
* lower than zero.
*/
public StrictSizeLimitEntityWrapper(final HttpEntity wrappedEntity, final long maxBytes) {
super(wrappedEntity);
if (wrappedEntity == null) {
throw new IllegalArgumentException("The wrappedEntity parameter must not be null.");
}
if (maxBytes < 0) {
throw new IllegalArgumentException("The maxBytes parameter must be greater or equal than zero.");
}
this.maxBytes = maxBytes;
}
/**
* @return a wrapper on the wrapped entity content stream
* @throws IOException
* when an error occurred while accessing the wrapped stream
*/
private InputStream getWrappedStream() throws IOException {
final InputStream in = this.wrappedEntity.getContent();
if (in == null) {
return in;
}
return new StrictLimitInputStream(in, this.maxBytes);
}
@Override
public InputStream getContent() throws IOException {
final InputStream result;
if (this.content == null) {
this.content = this.getWrappedStream();
result = this.content;
} else {
result = this.content;
}
return result;
}
}

@ -0,0 +1,64 @@
// StrictSizeLimitResponseInterceptor.java
// ---------------------------
// Copyright 2018 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.protocol.http;
import java.io.IOException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.protocol.HttpContext;
/**
* An HTTP response interceptor stricly limiting the amount of bytes fetched
* from an HTTP response.
*/
public class StrictSizeLimitResponseInterceptor implements HttpResponseInterceptor {
/** Maximum amount of bytes to fetch from the HTTP response body */
private final long maxBytes;
/**
* @param maxBytes
* the maximum amount of bytes to fetch from the HTTP response body
* @throws IllegalArgumentException
* when the maxBytes value is lower than zero
*/
public StrictSizeLimitResponseInterceptor(final long maxBytes) {
if (maxBytes < 0) {
throw new IllegalArgumentException("The maxBytes parameter must be greater or equals than zero");
}
this.maxBytes = maxBytes;
}
@Override
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
final HttpEntity entity = response.getEntity();
if (entity != null) {
response.setEntity(new StrictSizeLimitEntityWrapper(entity, this.maxBytes));
}
}
}

@ -110,6 +110,7 @@ import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceContainerCache;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.peers.graphics.WebStructureGraph.HostReference;
@ -1086,15 +1087,17 @@ public final class Protocol {
* @param messageBegin beginning of the log message
* @param ex exception to log
*/
private void logError(String messageBegin, Exception ex) {
String message = ex.getMessage();
if(message == null) {
message = "no details";
} else if(message.length() > MAX_ERROR_MESSAGE_LENGTH){
/* Strip too large details to avoid polluting this log with complete remote stack traces */
message = message.substring(0, MAX_ERROR_MESSAGE_LENGTH) + "...";
private void logError(final String messageBegin, final Exception ex) {
if(log.isFine()) {
String message = ex.getMessage();
if(message == null) {
message = "no details";
} else if(message.length() > MAX_ERROR_MESSAGE_LENGTH){
/* Strip too large details to avoid polluting this log with complete remote stack traces */
message = message.substring(0, MAX_ERROR_MESSAGE_LENGTH) + "...";
}
log.fine(messageBegin + " at " + this.targetBaseURL + " : " + message);
}
log.fine(messageBegin + " at " + this.targetBaseURL + " : " + message);
}
@Override
@ -1106,8 +1109,13 @@ public final class Protocol {
SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_AUTHENTICATED_ALLOW_SELF_SIGNED,
SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_AUTHENTICATED_ALLOW_SELF_SIGNED_DEFAULT);
}
this.instance = new RemoteInstance(this.targetBaseURL, null, "solr", this.timeout, trustSelfSignedOnAuthenticatedServer); // this is a 'patch configuration' which considers 'solr' as default collection
/* Add a limit to the maximum acceptable size of the remote peer Solr response. This can help prevent out of memory errors when :
* - this peer is overloaded
* - the remote peer has indexed documents with excessively large metadata (too large at least to fit within this peer resources)
* - the remote peer is a malicious one and would like to trigger a deny of service */
final long maxBytesPerResponse = MemoryControl.available() / 4;
this.instance = new RemoteInstance(this.targetBaseURL, null, "solr", this.timeout, trustSelfSignedOnAuthenticatedServer, maxBytesPerResponse); // this is a 'patch configuration' which considers 'solr' as default collection
try {
boolean useBinaryResponseWriter = SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT;
if (Switchboard.getSwitchboard() != null) {

Loading…
Cancel
Save