enable api calls with very long urls

pull/8/head
Michael Peter Christen 10 years ago
parent 1481a8ab56
commit 0710648c31

@ -341,33 +341,34 @@ public class HTTPClient {
throw new IOException(e.getMessage()); // can be caused at java.net.URI.create()
}
if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
if (localhost && pass != null) {
CredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(
AuthScope.ANY, // thats ok since we tested for localhost!
new UsernamePasswordCredentials(username, pass));
CloseableHttpClient httpclient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider).build();
byte[] content = null;
if (!localhost || pass == null) {
return getContentBytes(httpGet, maxBytes, concurrent);
}
CredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(
AuthScope.ANY, // thats ok since we tested for localhost!
new UsernamePasswordCredentials(username, pass));
CloseableHttpClient httpclient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider).build();
byte[] content = null;
try {
this.httpResponse = httpclient.execute(httpGet);
try {
this.httpResponse = httpclient.execute(httpGet);
try {
HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null) {
if (getStatusCode() == 200 && (maxBytes < 0 || httpEntity.getContentLength() < maxBytes)) {
content = getByteArray(httpEntity, maxBytes);
}
// Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
EntityUtils.consume(httpEntity);
HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null) {
if (getStatusCode() == 200 && (maxBytes < 0 || httpEntity.getContentLength() < maxBytes)) {
content = getByteArray(httpEntity, maxBytes);
}
} finally {
this.httpResponse.close();
// Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
EntityUtils.consume(httpEntity);
}
} finally {
httpclient.close();
this.httpResponse.close();
}
return content;
} finally {
httpclient.close();
}
return getContentBytes(httpGet, maxBytes, concurrent);
return content;
}
/**
@ -498,8 +499,8 @@ public class HTTPClient {
*/
public byte[] POSTbytes(final MultiProtocolURL url, final String vhost, final Map<String, ContentBody> post, final boolean usegzip, final boolean concurrent) throws IOException {
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
setHost(vhost); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
final boolean localhost = Domains.isLocalhost(url.getHost());
if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
if (vhost == null) setHost(Domains.LOCALHOST);
final MultipartEntityBuilder entityBuilder = MultipartEntityBuilder.create();

@ -34,11 +34,14 @@ import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.TreeMap;
import org.apache.http.entity.mime.content.ContentBody;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@ -238,14 +241,34 @@ public class WorkTables extends Tables {
String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
try {
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
ConcurrentLog.info("WorkTables", "executing url: " + url.toNormalform(true));
try {
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
l.put(url.toNormalform(true), client.getStatusCode());
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url.toString(), -1);
if (theapicall.length() > 1000) {
// use a POST to execute the call
int ai = theapicall.indexOf('?');
String[] tacs = theapicall.substring(ai + 1).split("&");
Map<String, ContentBody> post = new HashMap<>();
for (String a: tacs) {
int f = a.indexOf('=');
if (f < 0) continue;
post.put(a.substring(0, f), UTF8.StringBody(a.substring(f + 1)));
}
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall.substring(0, ai));
try {
client.POSTbytes(url, "localhost", post, false, false);
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url.toString(), -1);
}
} else {
// use a GET to execute the call
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
ConcurrentLog.info("WorkTables", "executing url: " + url.toNormalform(true));
try {
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
l.put(url.toNormalform(true), client.getStatusCode());
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url.toString(), -1);
}
}
} catch (MalformedURLException ex) {
ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall);

@ -312,6 +312,7 @@ public final class Fulltext {
if (connector == null) return;
String id = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
assert url != null && url.length() < 30000;
ConcurrentLog.info("Fulltext", "indexing: " + id + " " + url);
try {
connector.add(doc);

Loading…
Cancel
Save