always use HTTPClient by 'try with resources' pattern to free up

resources
pull/436/head
sgaebel 3 years ago
parent 69adaa9f55
commit cdf901270c

@ -196,29 +196,29 @@ public class IndexImportMediawiki_p {
* @return the last modified date for the file at fileURL, or 0L when unknown or when an error occurred
*/
private static long getLastModified(MultiProtocolURL fileURL) {
long lastModified = 0l;
try {
if (fileURL.isHTTP() || fileURL.isHTTPS()) {
/* http(s) : we do not use MultiprotocolURL.lastModified() which always returns 0L for these protocols */
HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
HttpResponse headResponse = httpClient.HEADResponse(fileURL, false);
if (headResponse != null && headResponse.getStatusLine() != null
&& headResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
Header lastModifiedHeader = headResponse
.getFirstHeader(HeaderFramework.LAST_MODIFIED);
if (lastModifiedHeader != null) {
Date lastModifiedDate = HeaderFramework.parseHTTPDate(lastModifiedHeader.getValue());
if(lastModifiedDate != null) {
lastModified = lastModifiedDate.getTime();
}
}
}
try (HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
HttpResponse headResponse = httpClient.HEADResponse(fileURL, false);
if (headResponse != null && headResponse.getStatusLine() != null
&& headResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
Header lastModifiedHeader = headResponse
.getFirstHeader(HeaderFramework.LAST_MODIFIED);
if (lastModifiedHeader != null) {
Date lastModifiedDate = HeaderFramework.parseHTTPDate(lastModifiedHeader.getValue());
if(lastModifiedDate != null) {
return lastModifiedDate.getTime();
}
}
}
}
} else {
lastModified = fileURL.lastModified();
return fileURL.lastModified();
}
} catch (IOException ignored) {
ConcurrentLog.warn("IndexImportMediawiki_p", "Could not retrieve last modified date for dump file at " + fileURL);
}
return lastModified;
return 0l;
}
}

@ -2538,7 +2538,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
return new ByteArrayInputStream(b);
}
if (isHTTP() || isHTTPS()) {
final HTTPClient client = new HTTPClient(agent);
try (final HTTPClient client = new HTTPClient(agent)){
client.setHost(getHost());
client.GET(this, false);
if (client.getStatusCode() != HttpStatus.SC_OK) {
@ -2546,6 +2546,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
"\nServer returned status: " + client.getHttpResponse().getStatusLine());
}
return new HTTPInputStream(client);
}
}
return null;
@ -2562,9 +2563,10 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
return b;
}
if (isHTTP() || isHTTPS()) {
final HTTPClient client = new HTTPClient(agent);
client.setHost(getHost());
return client.GETbytes(this, username, pass, false);
try (final HTTPClient client = new HTTPClient(agent)) {
client.setHost(getHost());
return client.GETbytes(this, username, pass, false);
}
}
return null;

@ -297,10 +297,9 @@ public class OpenSearchConnector extends AbstractFederateSearchConnector impleme
String searchurl = this.parseSearchTemplate(baseurl, searchTerms, startIndex, count);
try {
DigestURL aurl = new DigestURL(searchurl);
try {
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
this.lastaccesstime = System.currentTimeMillis();
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
byte[] result = httpClient.GETbytes(aurl, null, null, false);
if(result == null) {

@ -121,8 +121,9 @@ public class SRURSSConnector {
parts.put("resource", UTF8.StringBody(global ? "global" : "local"));
parts.put("nav", UTF8.StringBody("none"));
// result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
final HTTPClient httpClient = new HTTPClient(agent);
result = httpClient.POSTbytes(new MultiProtocolURL(rssSearchServiceURL), uri.getHost(), parts, false, false);
try (final HTTPClient httpClient = new HTTPClient(agent)) {
result = httpClient.POSTbytes(new MultiProtocolURL(rssSearchServiceURL), uri.getHost(), parts, false, false);
}
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
if (reader == null) {

@ -49,7 +49,7 @@ public class Network {
*/
public static Peers getNetwork(final String address) throws IOException {
Peers peers = new Peers();
final HTTPClient httpclient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
try (final HTTPClient httpclient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
final byte[] content = httpclient.GETbytes("http://" + address + "/Network.xml?page=1&maxCount=1000&ip=", null, null, false);
ByteArrayInputStream bais = new ByteArrayInputStream(content);
Document doc = null;
@ -74,6 +74,7 @@ public class Network {
//log.info(peer.toString());
}
}
}
return peers;
}

@ -140,123 +140,124 @@ public final class HTTPLoader {
final RequestHeader requestHeader = createRequestheader(request, agent);
// HTTP-Client
final HTTPClient client = new HTTPClient(agent);
client.setRedirecting(false); // we want to handle redirection
// ourselves, so we don't index pages
// twice
client.setTimout(this.socketTimeout);
client.setHeader(requestHeader.entrySet());
// send request
client.GET(url, false);
final StatusLine statusline = client.getHttpResponse().getStatusLine();
final int statusCode = statusline.getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
String requestURLString = request.url().toNormalform(true);
// check redirection
if (statusCode > 299 && statusCode < 310) {
client.close();
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, statusline, responseHeader, requestURLString);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
// we have two use cases here: loading from a crawl or just
// loading the url. Check this:
if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) {
// put redirect url on the crawler queue to repeat a
// double-check
/* We have to clone the request instance and not to modify directly its URL,
* otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */
Request redirectedRequest = new Request(request.initiator(),
redirectionUrl,
request.referrerhash(),
request.name(),
request.appdate(),
request.profileHandle(),
request.depth(),
request.timezoneOffset());
String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest);
if(rejectReason != null) {
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason);
try (final HTTPClient client = new HTTPClient(agent)) {
client.setRedirecting(false); // we want to handle redirection
// ourselves, so we don't index pages
// twice
client.setTimout(this.socketTimeout);
client.setHeader(requestHeader.entrySet());
// send request
client.GET(url, false);
final StatusLine statusline = client.getHttpResponse().getStatusLine();
final int statusCode = statusline.getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
String requestURLString = request.url().toNormalform(true);
// check redirection
if (statusCode > 299 && statusCode < 310) {
client.close();
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, statusline, responseHeader, requestURLString);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
// we have two use cases here: loading from a crawl or just
// loading the url. Check this:
if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) {
// put redirect url on the crawler queue to repeat a
// double-check
/* We have to clone the request instance and not to modify directly its URL,
* otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */
Request redirectedRequest = new Request(request.initiator(),
redirectionUrl,
request.referrerhash(),
request.name(),
request.appdate(),
request.profileHandle(),
request.depth(),
request.timezoneOffset());
String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest);
if(rejectReason != null) {
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason);
}
// in the end we must throw an exception (even if this is
// not an error, just to abort the current process
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to "
+ redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check");
}
// in the end we must throw an exception (even if this is
// not an error, just to abort the current process
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to "
+ redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check");
}
// if we are already doing a shutdown we don't need to retry
// crawling
if (Thread.currentThread().isInterrupted()) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
throw new IOException(
"CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$");
}
// check if the redirected URL is the same as the requested URL
// this shortcuts a time-out using retryCount
if (redirectionUrl.equals(url)) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirect to same url", -1);
throw new IOException( "retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$");
}
// retry crawling with new url
request.redirectURL(redirectionUrl);
return openInputStream(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
}
// we don't want to follow redirects
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline + "' for URL '" + requestURLString + "'$");
} else if (statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION) {
// the transfer is ok
/*
* When content is not large (less than Response.CRAWLER_MAX_SIZE_TO_CACHE), we have better cache it if cache is enabled and url is not local
*/
long contentLength = client.getHttpResponse().getEntity().getContentLength();
InputStream contentStream;
if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) {
byte[] content = null;
try {
content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize);
Cache.store(url, responseHeader, content);
} catch (final IOException e) {
this.log.warn("cannot write " + url + " to Cache (3): " + e.getMessage(), e);
} finally {
client.close();
}
contentStream = new ByteArrayInputStream(content);
} else {
/*
* Content length may already be known now : check it before opening a stream
*/
if (maxFileSize >= 0 && contentLength > maxFileSize) {
throw new IOException("Content to download exceed maximum value of " + maxFileSize + " bytes");
// if we are already doing a shutdown we don't need to retry
// crawling
if (Thread.currentThread().isInterrupted()) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
throw new IOException(
"CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$");
}
// check if the redirected URL is the same as the requested URL
// this shortcuts a time-out using retryCount
if (redirectionUrl.equals(url)) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirect to same url", -1);
throw new IOException( "retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$");
}
// retry crawling with new url
request.redirectURL(redirectionUrl);
return openInputStream(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
}
// we don't want to follow redirects
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline + "' for URL '" + requestURLString + "'$");
} else if (statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION) {
// the transfer is ok
/*
* Create a HTTPInputStream delegating to
* client.getContentstream(). Close method will ensure client is
* properly closed.
* When content is not large (less than Response.CRAWLER_MAX_SIZE_TO_CACHE), we have better cache it if cache is enabled and url is not local
*/
contentStream = new HTTPInputStream(client);
/* Anticipated content length may not be already known or incorrect : let's apply now the same eventual content size restriction as when loading in a byte array */
if(maxFileSize >= 0) {
contentStream = new StrictLimitInputStream(contentStream, maxFileSize,
"Content to download exceed maximum value of " + Formatter.bytesToString(maxFileSize));
long contentLength = client.getHttpResponse().getEntity().getContentLength();
InputStream contentStream;
if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) {
byte[] content = null;
try {
content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize);
Cache.store(url, responseHeader, content);
} catch (final IOException e) {
this.log.warn("cannot write " + url + " to Cache (3): " + e.getMessage(), e);
} finally {
client.close();
}
contentStream = new ByteArrayInputStream(content);
} else {
/*
* Content length may already be known now : check it before opening a stream
*/
if (maxFileSize >= 0 && contentLength > maxFileSize) {
throw new IOException("Content to download exceed maximum value of " + maxFileSize + " bytes");
}
/*
* Create a HTTPInputStream delegating to
* client.getContentstream(). Close method will ensure client is
* properly closed.
*/
contentStream = new HTTPInputStream(client);
/* Anticipated content length may not be already known or incorrect : let's apply now the same eventual content size restriction as when loading in a byte array */
if(maxFileSize >= 0) {
contentStream = new StrictLimitInputStream(contentStream, maxFileSize,
"Content to download exceed maximum value of " + Formatter.bytesToString(maxFileSize));
}
}
return new StreamResponse(new Response(request, requestHeader, responseHeader, profile, false, null), contentStream);
} else {
client.close();
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
throw new IOException("REJECTED WRONG STATUS TYPE '" + statusline
+ "' for URL '" + requestURLString + "'$");
}
return new StreamResponse(new Response(request, requestHeader, responseHeader, profile, false, null), contentStream);
} else {
client.close();
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
throw new IOException("REJECTED WRONG STATUS TYPE '" + statusline
+ "' for URL '" + requestURLString + "'$");
}
}
@ -364,90 +365,91 @@ public final class HTTPLoader {
final RequestHeader requestHeader = createRequestheader(request, agent);
// HTTP-Client
final HTTPClient client = new HTTPClient(agent);
client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice
client.setTimout(this.socketTimeout);
client.setHeader(requestHeader.entrySet());
// send request
final byte[] responseBody = client.GETbytes(url, sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), maxFileSize, false);
final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
String requestURLString = request.url().toNormalform(true);
// check redirection
if (statusCode > 299 && statusCode < 310) {
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client.getHttpResponse().getStatusLine(),
responseHeader, requestURLString);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
// we have two use cases here: loading from a crawl or just loading the url. Check this:
if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) {
// put redirect url on the crawler queue to repeat a double-check
/* We have to clone the request instance and not to modify directly its URL,
* otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */
Request redirectedRequest = new Request(request.initiator(),
redirectionUrl,
request.referrerhash(),
request.name(),
request.appdate(),
request.profileHandle(),
request.depth(),
request.timezoneOffset());
String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest);
// in the end we must throw an exception (even if this is not an error, just to abort the current process
if(rejectReason != null) {
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason);
try (final HTTPClient client = new HTTPClient(agent)) {
client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice
client.setTimout(this.socketTimeout);
client.setHeader(requestHeader.entrySet());
// send request
final byte[] responseBody = client.GETbytes(url, sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), maxFileSize, false);
final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
String requestURLString = request.url().toNormalform(true);
// check redirection
if (statusCode > 299 && statusCode < 310) {
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client.getHttpResponse().getStatusLine(),
responseHeader, requestURLString);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
// we have two use cases here: loading from a crawl or just loading the url. Check this:
if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) {
// put redirect url on the crawler queue to repeat a double-check
/* We have to clone the request instance and not to modify directly its URL,
* otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */
Request redirectedRequest = new Request(request.initiator(),
redirectionUrl,
request.referrerhash(),
request.name(),
request.appdate(),
request.profileHandle(),
request.depth(),
request.timezoneOffset());
String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest);
// in the end we must throw an exception (even if this is not an error, just to abort the current process
if(rejectReason != null) {
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason);
}
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check");
}
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check");
// if we are already doing a shutdown we don't need to retry crawling
if (Thread.currentThread().isInterrupted()) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$");
}
// retry crawling with new url
request.redirectURL(redirectionUrl);
return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
}
// if we are already doing a shutdown we don't need to retry crawling
if (Thread.currentThread().isInterrupted()) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$");
// we don't want to follow redirects
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$");
} else if (responseBody == null) {
// no response, reject file
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$");
} else if (statusCode == 200 || statusCode == 203) {
// the transfer is ok
// we write the new cache entry to file system directly
final long contentLength = responseBody.length;
ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength);
// check length again in case it was not possible to get the length before loading
if (maxFileSize >= 0 && contentLength > maxFileSize) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)$");
}
// retry crawling with new url
request.redirectURL(redirectionUrl);
return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
}
// we don't want to follow redirects
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$");
} else if (responseBody == null) {
// no response, reject file
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$");
} else if (statusCode == 200 || statusCode == 203) {
// the transfer is ok
// we write the new cache entry to file system directly
final long contentLength = responseBody.length;
ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength);
// check length again in case it was not possible to get the length before loading
if (maxFileSize >= 0 && contentLength > maxFileSize) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)$");
// create a new cache entry
response = new Response(
request,
requestHeader,
responseHeader,
profile,
false,
responseBody
);
return response;
} else {
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$");
}
// create a new cache entry
response = new Response(
request,
requestHeader,
responseHeader,
profile,
false,
responseBody
);
return response;
} else {
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$");
}
}
@ -484,9 +486,9 @@ public final class HTTPLoader {
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);
final HTTPClient client = new HTTPClient(agent);
client.setTimout(20000);
client.setHeader(requestHeader.entrySet());
try (final HTTPClient client = new HTTPClient(agent)) {
client.setTimout(20000);
client.setHeader(requestHeader.entrySet());
final byte[] responseBody = client.GETbytes(request.url(), null, null, false);
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(code, client.getHttpResponse().getAllHeaders());
@ -539,6 +541,7 @@ public final class HTTPLoader {
// if the response has not the right response type then reject file
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
}
}
return response;
}

@ -327,50 +327,53 @@ public class WorkTables extends Tables {
* @return a map of the called urls and the http status code of the api call or -1 if any other IOException occurred
*/
public Map<String, Integer> execAPICalls(String host, int port, Collection<String> pks, final String username, final String pass) {
// now call the api URLs and store the result status
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
client.setTimout(120000);
Tables.Row row;
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
for (final String pk: pks) {
row = null;
try {
row = select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final SpaceExceededException e) {
ConcurrentLog.logException(e);
}
if (row == null) continue;
String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
try {
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
final Map<String, String> attributes = url.getAttributes();
final boolean isTokenProtectedAPI = attributes.containsKey(TransactionManager.TRANSACTION_TOKEN_PARAM);
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
if (theapicall.length() > 1000 || isTokenProtectedAPI) {
// use a POST to execute the call
execPostAPICall(host, port, username, pass, client, l, url, isTokenProtectedAPI);
} else {
// use a GET to execute the call
ConcurrentLog.info("WorkTables", "executing url: " + url.toNormalform(true));
try {
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
if(client.getStatusCode() == HttpStatus.SC_METHOD_NOT_ALLOWED) {
/* GET method not allowed (HTTP 450 status) : this may be an old API entry,
* now restricted to HTTP POST and requiring a transaction token. We try now with POST. */
execPostAPICall(host, port, username, pass, client, l, url, true);
} else {
l.put(url.toNormalform(true), client.getStatusCode());
// now call the api URLs and store the result status
try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
client.setTimout(120000);
Tables.Row row;
for (final String pk: pks) {
row = null;
try {
row = select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final SpaceExceededException e) {
ConcurrentLog.logException(e);
}
if (row == null) continue;
String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
try {
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
final Map<String, String> attributes = url.getAttributes();
final boolean isTokenProtectedAPI = attributes.containsKey(TransactionManager.TRANSACTION_TOKEN_PARAM);
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
if (theapicall.length() > 1000 || isTokenProtectedAPI) {
// use a POST to execute the call
execPostAPICall(host, port, username, pass, client, l, url, isTokenProtectedAPI);
} else {
// use a GET to execute the call
ConcurrentLog.info("WorkTables", "executing url: " + url.toNormalform(true));
try {
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
if(client.getStatusCode() == HttpStatus.SC_METHOD_NOT_ALLOWED) {
/* GET method not allowed (HTTP 450 status) : this may be an old API entry,
* now restricted to HTTP POST and requiring a transaction token. We try now with POST. */
execPostAPICall(host, port, username, pass, client, l, url, true);
} else {
l.put(url.toNormalform(true), client.getStatusCode());
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url.toString(), -1);
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url.toString(), -1);
}
} catch (MalformedURLException ex) {
ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall);
}
} catch (MalformedURLException ex) {
ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall);
}
} catch (IOException e) {
ConcurrentLog.logException(e);
}
return l;
}
@ -447,11 +450,10 @@ public class WorkTables extends Tables {
*/
public static int execGetAPICall(String host, int port, String path, byte[] pk, final String username, final String pass) {
// now call the api URLs and store the result status
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
client.setTimout(120000);
String url = "http://" + host + ":" + port + path;
if (pk != null) url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(pk);
try {
try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
client.setTimout(120000);
client.GETbytes(url, username, pass, false);
return client.getStatusCode();
} catch (final IOException e) {

@ -114,9 +114,8 @@ public class sitemapParser extends AbstractParser implements Parser {
public static SitemapReader parse(final DigestURL sitemapURL, final ClientIdentification.Agent agent) throws IOException {
// download document
ConcurrentLog.info("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true));
final HTTPClient client = new HTTPClient(agent);
// client.setHeader(requestHeader.entrySet());
try {
try (final HTTPClient client = new HTTPClient(agent)) {
client.GET(sitemapURL.toNormalform(false), false);
if (client.getStatusCode() != 200) {
throw new IOException("Unable to download the sitemap file " + sitemapURL +

@ -147,19 +147,12 @@ public class opensearchdescriptionReader extends DefaultHandler {
public opensearchdescriptionReader(final String path, final ClientIdentification.Agent agent) {
this();
this.agent = agent;
HTTPClient www = new HTTPClient(agent);
try {
try (HTTPClient www = new HTTPClient(agent)) {
www.GET(path, false);
final SAXParser saxParser = getParser();
saxParser.parse(www.getContentstream(), this);
} catch (final Exception e) {
ConcurrentLog.logException(e);
} finally {
try {
www.close();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
}
@ -170,8 +163,7 @@ public class opensearchdescriptionReader extends DefaultHandler {
this.parsingTextValue = false;
this.rssurl = null;
this.atomurl = null;
HTTPClient www = new HTTPClient(this.agent);
try {
try (HTTPClient www = new HTTPClient(this.agent)) {
www.GET(path, false);
final SAXParser saxParser = getParser();
try {
@ -185,12 +177,6 @@ public class opensearchdescriptionReader extends DefaultHandler {
} catch (final Exception e) {
ConcurrentLog.warn("opensearchdescriptionReader", "parse exception: " + e);
return false;
} finally {
try {
www.close();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
}

@ -132,12 +132,11 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler {
RequestHeader proxyHeaders = ProxyHandler.convertHeaderFromJetty(request);
setProxyHeaderForClient(request, proxyHeaders);
final HTTPClient client = new HTTPClient(ClientIdentification.yacyProxyAgent);
client.setTimout(timeout);
client.setHeader(proxyHeaders.entrySet());
client.setRedirecting(false);
// send request
try {
try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyProxyAgent)) {
client.setTimout(timeout);
client.setHeader(proxyHeaders.entrySet());
client.setRedirecting(false);
String queryString = request.getQueryString() != null ? "?" + request.getQueryString() : "";
DigestURL digestURI = new DigestURL(request.getScheme(), request.getServerName(), request.getServerPort(), request.getRequestURI() + queryString);
if (request.getMethod().equals(HeaderFramework.METHOD_GET)) {
@ -219,8 +218,6 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler {
}
} catch (final SocketException se) {
throw new ServletException("Socket Exception: " + se.getMessage());
} finally {
client.close();
}
// we handled this request, break out of handler chain

@ -159,11 +159,11 @@ public final class Protocol {
final String path,
final Map<String, ContentBody> parts,
final int timeout) throws IOException {
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
httpClient.setTimout(timeout);
MultiProtocolURL targetURL = new MultiProtocolURL(targetBaseURL, path);
this.result = httpClient.POSTbytes(targetURL, Seed.b64Hash2hexHash(targetHash) + ".yacyh", parts, false,
true);
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
httpClient.setTimout(timeout);
MultiProtocolURL targetURL = new MultiProtocolURL(targetBaseURL, path);
this.result = httpClient.POSTbytes(targetURL, Seed.b64Hash2hexHash(targetHash) + ".yacyh", parts, false, true);
}
}
/**
@ -197,19 +197,16 @@ public final class Protocol {
final String salt = crypt.randomSalt();
long responseTime = Long.MAX_VALUE;
byte[] content = null;
try {
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 30000)) {
// generate request
final Map<String, ContentBody> parts =
basicRequestParts(Switchboard.getSwitchboard(), null, salt);
final Map<String, ContentBody> parts = basicRequestParts(Switchboard.getSwitchboard(), null, salt);
parts.put("count", UTF8.StringBody("20"));
parts.put("magic", UTF8.StringBody(Long.toString(Network.magic)));
parts.put("seed", UTF8.StringBody(mySeed.genSeedStr(salt)));
// send request
final long start = System.currentTimeMillis();
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 30000);
content =
httpClient.POSTbytes(
content = httpClient.POSTbytes(
new MultiProtocolURL(targetBaseURL, "/yacy/hello.html"),
Seed.b64Hash2hexHash(targetHash) + ".yacyh",
parts,
@ -433,41 +430,44 @@ public final class Protocol {
parts.put("count", UTF8.StringBody(Integer.toString(maxCount)));
parts.put("time", UTF8.StringBody(Long.toString(maxTime)));
// final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, (int) maxTime);
RSSReader reader = null;
for (final String ip: target.getIPs()) {
MultiProtocolURL targetBaseURL = null;
try {
targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps);
byte[] result;
try {
result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
/* Failed with https : retry with http */
targetBaseURL = target.getPublicMultiprotocolURL(ip, false);
result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true);
if(result != null) {
/* Got something with http : mark peer SSL as unavailable on target peer */
markSSLUnavailableOnPeer(seedDB, target, ip, "yacyClient.queryRemoteCrawlURLs");
}
} else {
throw e;
}
}
reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
} catch(MalformedURLException e) {
Network.log.warn("yacyClient.queryRemoteCrawlURLs malformed target URL for peer '" + target.getName()
+ "' on address : " + ip);
} catch (final IOException e ) {
reader = null;
Network.log.warn("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
}
if (reader != null) {
break;
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, (int) maxTime)) {
for (final String ip: target.getIPs()) {
MultiProtocolURL targetBaseURL = null;
try {
targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps);
byte[] result;
try {
result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
/* Failed with https : retry with http */
targetBaseURL = target.getPublicMultiprotocolURL(ip, false);
result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true);
if(result != null) {
/* Got something with http : mark peer SSL as unavailable on target peer */
markSSLUnavailableOnPeer(seedDB, target, ip, "yacyClient.queryRemoteCrawlURLs");
}
} else {
throw e;
}
}
reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
} catch(MalformedURLException e) {
Network.log.warn("yacyClient.queryRemoteCrawlURLs malformed target URL for peer '" + target.getName()
+ "' on address : " + ip);
} catch (final IOException e ) {
reader = null;
Network.log.warn("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
}
if (reader != null) {
break;
}
target.put(Seed.RCOUNT, "0");
seedDB.peerActions.interfaceDeparture(target, ip);
}
target.put(Seed.RCOUNT, "0");
seedDB.peerActions.interfaceDeparture(target, ip);
} catch (IOException e) {
Network.log.warn(e);
}
final RSSFeed feed = reader == null ? null : reader.getFeed();
@ -962,13 +962,14 @@ public final class Protocol {
//resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
}
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 8000);
byte[] a = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL + "/yacy/search.html"), hostname, parts, false, true);
if (a != null && a.length > 200000) {
// there is something wrong. This is too large, maybe a hack on the other side?
a = null;
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 8000)) {
byte[] a = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL + "/yacy/search.html"), hostname, parts, false, true);
if (a != null && a.length > 200000) {
// there is something wrong. This is too large, maybe a hack on the other side?
a = null;
}
resultMap = FileUtils.table(a);
}
resultMap = FileUtils.table(a);
// evaluate request result
if ( resultMap == null || resultMap.isEmpty() ) {
@ -1628,25 +1629,26 @@ public final class Protocol {
}
parts.put("lurlEntry", UTF8.StringBody(crypt.simpleEncode(lurlstr, salt)));
// send request
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 10000);
MultiProtocolURL targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps);
byte[] content;
try {
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"),
target.getHexHash() + ".yacyh", parts, false, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
/* Failed using https : retry with http */
targetBaseURL = target.getPublicMultiprotocolURL(ip, false);
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"),
target.getHexHash() + ".yacyh", parts, false, true);
if(content != null) {
/* Success with http : mark SSL as unavailable on the target peer */
markSSLUnavailableOnPeer(sb.peers, target, ip, "yacyClient.crawlReceipt");
}
} else {
throw e;
}
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 10000)) {
MultiProtocolURL targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps);
try {
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"),
target.getHexHash() + ".yacyh", parts, false, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
/* Failed using https : retry with http */
targetBaseURL = target.getPublicMultiprotocolURL(ip, false);
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"),
target.getHexHash() + ".yacyh", parts, false, true);
if(content != null) {
/* Success with http : mark SSL as unavailable on the target peer */
markSSLUnavailableOnPeer(sb.peers, target, ip, "yacyClient.crawlReceipt");
}
} else {
throw e;
}
}
}
return FileUtils.table(content);
} catch (final Exception e ) {
@ -1849,23 +1851,24 @@ public final class Protocol {
parts.put("wordc", UTF8.StringBody(Integer.toString(indexes.size())));
parts.put("entryc", UTF8.StringBody(Integer.toString(indexcount)));
parts.put("indexes", UTF8.StringBody(entrypost.toString()));
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout);
byte[] content = null;
try {
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"),
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false);
/* Failed with https : retry with http on the same address */
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"),
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
if(content != null) {
/* Success with http : mark SSL as unavailable on the target peer */
markSSLUnavailableOnPeer(Switchboard.getSwitchboard().peers, targetSeed, ip, "yacyClient.transferRWI");
}
} else {
throw e;
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout)) {
try {
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"),
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false);
/* Failed with https : retry with http on the same address */
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"),
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
if(content != null) {
/* Success with http : mark SSL as unavailable on the target peer */
markSSLUnavailableOnPeer(Switchboard.getSwitchboard().peers, targetSeed, ip, "yacyClient.transferRWI");
}
} else {
throw e;
}
}
}
final Iterator<String> v = FileUtils.strings(content);
@ -1953,20 +1956,21 @@ public final class Protocol {
MultiProtocolURL targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, preferHttps);
parts.put("urlc", UTF8.StringBody(Integer.toString(urlc)));
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout);
byte[] content = null;
try {
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferURL.html"),
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false);
/* Failed with https : retry with http on the same address */
try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout)) {
try {
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferURL.html"),
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
} else {
throw e;
}
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
} catch(final IOException e) {
if(targetBaseURL.isHTTPS()) {
targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false);
/* Failed with https : retry with http on the same address */
content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferURL.html"),
targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true);
} else {
throw e;
}
}
}
final Iterator<String> v = FileUtils.strings(content);
@ -1998,10 +2002,8 @@ public final class Protocol {
SwitchboardConstants.NETWORK_PROTOCOL_HTTPS_PREFERRED_DEFAULT);
for (final String ip : targetSeed.getIPs()) {
try {
final Map<String, ContentBody> parts =
basicRequestParts(sb, targetSeed.hash, salt);
final HTTPClient httpclient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 15000);
try (final HTTPClient httpclient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 15000)) {
final Map<String, ContentBody> parts = basicRequestParts(sb, targetSeed.hash, salt);
MultiProtocolURL targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, preferHttps);
byte[] content;
try {

@ -897,19 +897,20 @@ public final class SeedDB implements AlternativeDomainNames {
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache, no-store"); // httpc uses HTTP/1.0 is this necessary?
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.yacyInternetCrawlerAgent.userAgent);
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
client.setHeader(reqHeader.entrySet());
byte[] content = null;
try {
// send request
content = client.GETbytes(seedURL, null, null, false);
} catch (final Exception e) {
throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage());
}
// check response code
if (client.getHttpResponse().getStatusLine().getStatusCode() != 200) {
throw new IOException("Server returned status: " + client.getHttpResponse().getStatusLine());
try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
client.setHeader(reqHeader.entrySet());
try {
// send request
content = client.GETbytes(seedURL, null, null, false);
} catch (final Exception e) {
throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage());
}
// check response code
if (client.getHttpResponse().getStatusLine().getStatusCode() != 200) {
throw new IOException("Server returned status: " + client.getHttpResponse().getStatusLine());
}
}
try {
@ -1124,13 +1125,12 @@ public final class SeedDB implements AlternativeDomainNames {
@Override
public void run() {
// load the seed list
try {
try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout)) {
DigestURL url = new DigestURL(seedListFileURL);
//final long start = System.currentTimeMillis();
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache, no-store");
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout);
client.setHeader(reqHeader.entrySet());
client.HEADResponse(url.toNormalform(false), false);

@ -444,10 +444,10 @@ public final class HTTPDProxyHandler {
requestHeader.remove(HeaderFramework.HOST);
final HTTPClient client = setupHttpClient(requestHeader, agent);
// send request
try {
try (final HTTPClient client = new HTTPClient(agent, timeout)) {
client.setHeader(requestHeader.entrySet());
client.setRedirecting(false);
client.GET(getUrl, false);
if (log.isFinest()) log.finest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
@ -596,20 +596,7 @@ public final class HTTPDProxyHandler {
}
} // end hasBody
} catch(final SocketException se) {
// if opened ...
// if(res != null) {
// // client cut proxy connection, abort download
// res.abort();
// }
client.close();
handleProxyException(se,conProp,respond,url);
} finally {
// if opened ...
// if(res != null) {
// // ... close connection
// res.closeStream();
// }
client.close();
}
} catch (final Exception e) {
handleProxyException(e,conProp,respond,url);
@ -759,20 +746,6 @@ public final class HTTPDProxyHandler {
return domain;
}
/**
* creates a new HttpClient and sets parameters according to proxy needs
*
* @param requestHeader
* @return
*/
private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final ClientIdentification.Agent agent) {
// setup HTTP-client
final HTTPClient client = new HTTPClient(agent, timeout);
client.setHeader(requestHeader.entrySet());
client.setRedirecting(false);
return client;
}
/**
* determines in which form the response should be send and sets header accordingly
* if the content length is not set we need to use chunked content encoding

@ -686,23 +686,13 @@ public class serverSwitch {
final String[] uris = CommonPattern.COMMA.split(uri);
for (String netdef : uris) {
netdef = netdef.trim();
try {
try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
final RequestHeader reqHeader = new RequestHeader();
reqHeader
.put(HeaderFramework.USER_AGENT,
ClientIdentification.yacyInternetCrawlerAgent.userAgent);
final HTTPClient client = new HTTPClient(
ClientIdentification.yacyInternetCrawlerAgent);
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.yacyInternetCrawlerAgent.userAgent);
client.setHeader(reqHeader.entrySet());
byte[] data = client
.GETbytes(
uri,
getConfig(
SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME,
"admin"),
getConfig(
SwitchboardConstants.ADMIN_ACCOUNT_B64MD5,
""), false);
byte[] data = client.GETbytes(uri,
getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"),
getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), false);
if (data == null || data.length == 0) {
continue;
}

@ -532,9 +532,7 @@ public final class yacy {
final String adminUser = config.getProperty(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin");
// send 'wget' to web interface
final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
// con.setHeader(requestHeader.entrySet());
try {
try (final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
/* First get a valid transaction token using HTTP GET */
con.GETbytes("http://localhost:"+ port +"/" + path, adminUser, encodedPassword, false);
@ -600,9 +598,7 @@ public final class yacy {
if (encodedPassword == null) encodedPassword = ""; // not defined
// send 'wget' to web interface
final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
// con.setHeader(requestHeader.entrySet());
try {
try (final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) {
con.GETbytes("http://localhost:"+ port +"/" + path, config.getProperty(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME,"admin"), encodedPassword, false);
if (con.getStatusCode() > 199 && con.getStatusCode() < 300) {
ConcurrentLog.config("COMMAND-STEERING", "YACY accepted steering command: " + processdescription);

Loading…
Cancel
Save