From cdf901270cb2a8d09c3673aa3b2464ba067efe56 Mon Sep 17 00:00:00 2001 From: sgaebel Date: Sun, 31 Oct 2021 18:47:10 +0100 Subject: [PATCH] always use HTTPClient by 'try with resources' pattern to free up resources --- htroot/IndexImportMediawiki_p.java | 32 +- .../cora/document/id/MultiProtocolURL.java | 10 +- .../opensearch/OpenSearchConnector.java | 3 +- .../federate/opensearch/SRURSSConnector.java | 5 +- .../yacy/cora/federate/yacy/api/Network.java | 3 +- .../yacy/crawler/retrieval/HTTPLoader.java | 391 +++++++++--------- source/net/yacy/data/WorkTables.java | 86 ++-- .../yacy/document/parser/sitemapParser.java | 3 +- .../xml/opensearchdescriptionReader.java | 18 +- source/net/yacy/http/ProxyHandler.java | 11 +- source/net/yacy/peers/Protocol.java | 202 ++++----- source/net/yacy/peers/SeedDB.java | 28 +- .../yacy/server/http/HTTPDProxyHandler.java | 33 +- source/net/yacy/server/serverSwitch.java | 20 +- source/net/yacy/yacy.java | 8 +- 15 files changed, 402 insertions(+), 451 deletions(-) diff --git a/htroot/IndexImportMediawiki_p.java b/htroot/IndexImportMediawiki_p.java index 0b95382d7..b45a76a5e 100644 --- a/htroot/IndexImportMediawiki_p.java +++ b/htroot/IndexImportMediawiki_p.java @@ -196,29 +196,29 @@ public class IndexImportMediawiki_p { * @return the last modified date for the file at fileURL, or 0L when unknown or when an error occurred */ private static long getLastModified(MultiProtocolURL fileURL) { - long lastModified = 0l; try { if (fileURL.isHTTP() || fileURL.isHTTPS()) { /* http(s) : we do not use MultiprotocolURL.lastModified() which always returns 0L for these protocols */ - HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); - HttpResponse headResponse = httpClient.HEADResponse(fileURL, false); - if (headResponse != null && headResponse.getStatusLine() != null - && headResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { - Header lastModifiedHeader = headResponse - .getFirstHeader(HeaderFramework.LAST_MODIFIED); - if (lastModifiedHeader != null) { - Date lastModifiedDate = HeaderFramework.parseHTTPDate(lastModifiedHeader.getValue()); - if(lastModifiedDate != null) { - lastModified = lastModifiedDate.getTime(); - } - } - } + try (HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { + HttpResponse headResponse = httpClient.HEADResponse(fileURL, false); + if (headResponse != null && headResponse.getStatusLine() != null + && headResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { + Header lastModifiedHeader = headResponse + .getFirstHeader(HeaderFramework.LAST_MODIFIED); + if (lastModifiedHeader != null) { + Date lastModifiedDate = HeaderFramework.parseHTTPDate(lastModifiedHeader.getValue()); + if(lastModifiedDate != null) { + return lastModifiedDate.getTime(); + } + } + } + } } else { - lastModified = fileURL.lastModified(); + return fileURL.lastModified(); } } catch (IOException ignored) { ConcurrentLog.warn("IndexImportMediawiki_p", "Could not retrieve last modified date for dump file at " + fileURL); } - return lastModified; + return 0l; } } diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 7f4bf9a2c..92ab41a52 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -2538,7 +2538,7 @@ public class MultiProtocolURL implements Serializable, Comparable 299 && statusCode < 310) { - client.close(); - - final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, statusline, responseHeader, requestURLString); - - if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) { - // we have two use cases here: loading from a crawl or just - // loading the url. Check this: - if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) { - // put redirect url on the crawler queue to repeat a - // double-check - /* We have to clone the request instance and not to modify directly its URL, - * otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */ - Request redirectedRequest = new Request(request.initiator(), - redirectionUrl, - request.referrerhash(), - request.name(), - request.appdate(), - request.profileHandle(), - request.depth(), - request.timezoneOffset()); - String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest); - if(rejectReason != null) { - throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason); + try (final HTTPClient client = new HTTPClient(agent)) { + client.setRedirecting(false); // we want to handle redirection + // ourselves, so we don't index pages + // twice + client.setTimout(this.socketTimeout); + client.setHeader(requestHeader.entrySet()); + + // send request + client.GET(url, false); + final StatusLine statusline = client.getHttpResponse().getStatusLine(); + final int statusCode = statusline.getStatusCode(); + final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); + String requestURLString = request.url().toNormalform(true); + + // check redirection + if (statusCode > 299 && statusCode < 310) { + client.close(); + + final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, statusline, responseHeader, requestURLString); + + if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) { + // we have two use cases here: loading from a crawl or just + // loading the url. Check this: + if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) { + // put redirect url on the crawler queue to repeat a + // double-check + /* We have to clone the request instance and not to modify directly its URL, + * otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */ + Request redirectedRequest = new Request(request.initiator(), + redirectionUrl, + request.referrerhash(), + request.name(), + request.appdate(), + request.profileHandle(), + request.depth(), + request.timezoneOffset()); + String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest); + if(rejectReason != null) { + throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason); + } + // in the end we must throw an exception (even if this is + // not an error, just to abort the current process + throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " + + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check"); } - // in the end we must throw an exception (even if this is - // not an error, just to abort the current process - throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " - + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check"); - } - - // if we are already doing a shutdown we don't need to retry - // crawling - if (Thread.currentThread().isInterrupted()) { - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, - FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode); - throw new IOException( - "CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$"); - } - - // check if the redirected URL is the same as the requested URL - // this shortcuts a time-out using retryCount - if (redirectionUrl.equals(url)) { - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirect to same url", -1); - throw new IOException( "retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$"); - } - - // retry crawling with new url - request.redirectURL(redirectionUrl); - return openInputStream(request, profile, retryCount - 1, maxFileSize, blacklistType, agent); - } - // we don't want to follow redirects - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode); - throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline + "' for URL '" + requestURLString + "'$"); - } else if (statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION) { - // the transfer is ok - - /* - * When content is not large (less than Response.CRAWLER_MAX_SIZE_TO_CACHE), we have better cache it if cache is enabled and url is not local - */ - long contentLength = client.getHttpResponse().getEntity().getContentLength(); - InputStream contentStream; - if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) { - byte[] content = null; - try { - content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize); - Cache.store(url, responseHeader, content); - } catch (final IOException e) { - this.log.warn("cannot write " + url + " to Cache (3): " + e.getMessage(), e); - } finally { - client.close(); - } - - contentStream = new ByteArrayInputStream(content); - } else { - /* - * Content length may already be known now : check it before opening a stream - */ - if (maxFileSize >= 0 && contentLength > maxFileSize) { - throw new IOException("Content to download exceed maximum value of " + maxFileSize + " bytes"); + + // if we are already doing a shutdown we don't need to retry + // crawling + if (Thread.currentThread().isInterrupted()) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode); + throw new IOException( + "CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$"); + } + + // check if the redirected URL is the same as the requested URL + // this shortcuts a time-out using retryCount + if (redirectionUrl.equals(url)) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirect to same url", -1); + throw new IOException( "retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$"); + } + + // retry crawling with new url + request.redirectURL(redirectionUrl); + return openInputStream(request, profile, retryCount - 1, maxFileSize, blacklistType, agent); } + // we don't want to follow redirects + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode); + throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline + "' for URL '" + requestURLString + "'$"); + } else if (statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION) { + // the transfer is ok + /* - * Create a HTTPInputStream delegating to - * client.getContentstream(). Close method will ensure client is - * properly closed. + * When content is not large (less than Response.CRAWLER_MAX_SIZE_TO_CACHE), we have better cache it if cache is enabled and url is not local */ - contentStream = new HTTPInputStream(client); - /* Anticipated content length may not be already known or incorrect : let's apply now the same eventual content size restriction as when loading in a byte array */ - if(maxFileSize >= 0) { - contentStream = new StrictLimitInputStream(contentStream, maxFileSize, - "Content to download exceed maximum value of " + Formatter.bytesToString(maxFileSize)); + long contentLength = client.getHttpResponse().getEntity().getContentLength(); + InputStream contentStream; + if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) { + byte[] content = null; + try { + content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize); + Cache.store(url, responseHeader, content); + } catch (final IOException e) { + this.log.warn("cannot write " + url + " to Cache (3): " + e.getMessage(), e); + } finally { + client.close(); + } + + contentStream = new ByteArrayInputStream(content); + } else { + /* + * Content length may already be known now : check it before opening a stream + */ + if (maxFileSize >= 0 && contentLength > maxFileSize) { + throw new IOException("Content to download exceed maximum value of " + maxFileSize + " bytes"); + } + /* + * Create a HTTPInputStream delegating to + * client.getContentstream(). Close method will ensure client is + * properly closed. + */ + contentStream = new HTTPInputStream(client); + /* Anticipated content length may not be already known or incorrect : let's apply now the same eventual content size restriction as when loading in a byte array */ + if(maxFileSize >= 0) { + contentStream = new StrictLimitInputStream(contentStream, maxFileSize, + "Content to download exceed maximum value of " + Formatter.bytesToString(maxFileSize)); + } } + + return new StreamResponse(new Response(request, requestHeader, responseHeader, profile, false, null), contentStream); + } else { + client.close(); + // if the response has not the right response type then reject file + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode); + throw new IOException("REJECTED WRONG STATUS TYPE '" + statusline + + "' for URL '" + requestURLString + "'$"); } - - return new StreamResponse(new Response(request, requestHeader, responseHeader, profile, false, null), contentStream); - } else { - client.close(); - // if the response has not the right response type then reject file - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, - FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode); - throw new IOException("REJECTED WRONG STATUS TYPE '" + statusline - + "' for URL '" + requestURLString + "'$"); } } @@ -364,90 +365,91 @@ public final class HTTPLoader { final RequestHeader requestHeader = createRequestheader(request, agent); // HTTP-Client - final HTTPClient client = new HTTPClient(agent); - client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice - client.setTimout(this.socketTimeout); - client.setHeader(requestHeader.entrySet()); - - // send request - final byte[] responseBody = client.GETbytes(url, sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), maxFileSize, false); - final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); - final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); - String requestURLString = request.url().toNormalform(true); - - // check redirection - if (statusCode > 299 && statusCode < 310) { - - final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client.getHttpResponse().getStatusLine(), - responseHeader, requestURLString); - - if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) { - // we have two use cases here: loading from a crawl or just loading the url. Check this: - if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) { - // put redirect url on the crawler queue to repeat a double-check - /* We have to clone the request instance and not to modify directly its URL, - * otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */ - Request redirectedRequest = new Request(request.initiator(), - redirectionUrl, - request.referrerhash(), - request.name(), - request.appdate(), - request.profileHandle(), - request.depth(), - request.timezoneOffset()); - String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest); - // in the end we must throw an exception (even if this is not an error, just to abort the current process - if(rejectReason != null) { - throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason); + try (final HTTPClient client = new HTTPClient(agent)) { + client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice + client.setTimout(this.socketTimeout); + client.setHeader(requestHeader.entrySet()); + + // send request + final byte[] responseBody = client.GETbytes(url, sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), maxFileSize, false); + final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); + final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); + String requestURLString = request.url().toNormalform(true); + + // check redirection + if (statusCode > 299 && statusCode < 310) { + + final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client.getHttpResponse().getStatusLine(), + responseHeader, requestURLString); + + if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) { + // we have two use cases here: loading from a crawl or just loading the url. Check this: + if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) { + // put redirect url on the crawler queue to repeat a double-check + /* We have to clone the request instance and not to modify directly its URL, + * otherwise the stackCrawl() function would reject it, because detecting it as already in the activeWorkerEntries */ + Request redirectedRequest = new Request(request.initiator(), + redirectionUrl, + request.referrerhash(), + request.name(), + request.appdate(), + request.profileHandle(), + request.depth(), + request.timezoneOffset()); + String rejectReason = this.sb.crawlStacker.stackCrawl(redirectedRequest); + // in the end we must throw an exception (even if this is not an error, just to abort the current process + if(rejectReason != null) { + throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason); + } + throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check"); } - throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check"); + + // if we are already doing a shutdown we don't need to retry crawling + if (Thread.currentThread().isInterrupted()) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode); + throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$"); + } + + // retry crawling with new url + request.redirectURL(redirectionUrl); + return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent); } - - // if we are already doing a shutdown we don't need to retry crawling - if (Thread.currentThread().isInterrupted()) { - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode); - throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$"); + // we don't want to follow redirects + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode); + throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$"); + } else if (responseBody == null) { + // no response, reject file + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode); + throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$"); + } else if (statusCode == 200 || statusCode == 203) { + // the transfer is ok + + // we write the new cache entry to file system directly + final long contentLength = responseBody.length; + ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength); + + // check length again in case it was not possible to get the length before loading + if (maxFileSize >= 0 && contentLength > maxFileSize) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode); + throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)$"); } - - // retry crawling with new url - request.redirectURL(redirectionUrl); - return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent); - } - // we don't want to follow redirects - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode); - throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$"); - } else if (responseBody == null) { - // no response, reject file - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode); - throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$"); - } else if (statusCode == 200 || statusCode == 203) { - // the transfer is ok - - // we write the new cache entry to file system directly - final long contentLength = responseBody.length; - ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength); - - // check length again in case it was not possible to get the length before loading - if (maxFileSize >= 0 && contentLength > maxFileSize) { - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode); - throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)$"); + + // create a new cache entry + response = new Response( + request, + requestHeader, + responseHeader, + profile, + false, + responseBody + ); + + return response; + } else { + // if the response has not the right response type then reject file + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode); + throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$"); } - - // create a new cache entry - response = new Response( - request, - requestHeader, - responseHeader, - profile, - false, - responseBody - ); - - return response; - } else { - // if the response has not the right response type then reject file - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode); - throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$"); } } @@ -484,9 +486,9 @@ public final class HTTPLoader { requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET); requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING); - final HTTPClient client = new HTTPClient(agent); - client.setTimout(20000); - client.setHeader(requestHeader.entrySet()); + try (final HTTPClient client = new HTTPClient(agent)) { + client.setTimout(20000); + client.setHeader(requestHeader.entrySet()); final byte[] responseBody = client.GETbytes(request.url(), null, null, false); final int code = client.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader header = new ResponseHeader(code, client.getHttpResponse().getAllHeaders()); @@ -539,6 +541,7 @@ public final class HTTPLoader { // if the response has not the right response type then reject file throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString()); } + } return response; } diff --git a/source/net/yacy/data/WorkTables.java b/source/net/yacy/data/WorkTables.java index 679c33057..0af5baf55 100644 --- a/source/net/yacy/data/WorkTables.java +++ b/source/net/yacy/data/WorkTables.java @@ -327,50 +327,53 @@ public class WorkTables extends Tables { * @return a map of the called urls and the http status code of the api call or -1 if any other IOException occurred */ public Map execAPICalls(String host, int port, Collection pks, final String username, final String pass) { - // now call the api URLs and store the result status - final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); - client.setTimout(120000); - Tables.Row row; LinkedHashMap l = new LinkedHashMap(); - for (final String pk: pks) { - row = null; - try { - row = select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk)); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } catch (final SpaceExceededException e) { - ConcurrentLog.logException(e); - } - if (row == null) continue; - String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK()); - try { - MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall); - final Map attributes = url.getAttributes(); - final boolean isTokenProtectedAPI = attributes.containsKey(TransactionManager.TRANSACTION_TOKEN_PARAM); - // use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes() - if (theapicall.length() > 1000 || isTokenProtectedAPI) { - // use a POST to execute the call - execPostAPICall(host, port, username, pass, client, l, url, isTokenProtectedAPI); - } else { - // use a GET to execute the call - ConcurrentLog.info("WorkTables", "executing url: " + url.toNormalform(true)); - try { - client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path% - if(client.getStatusCode() == HttpStatus.SC_METHOD_NOT_ALLOWED) { - /* GET method not allowed (HTTP 450 status) : this may be an old API entry, - * now restricted to HTTP POST and requiring a transaction token. We try now with POST. */ - execPostAPICall(host, port, username, pass, client, l, url, true); - } else { - l.put(url.toNormalform(true), client.getStatusCode()); + // now call the api URLs and store the result status + try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { + client.setTimout(120000); + Tables.Row row; + for (final String pk: pks) { + row = null; + try { + row = select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk)); + } catch (final IOException e) { + ConcurrentLog.logException(e); + } catch (final SpaceExceededException e) { + ConcurrentLog.logException(e); + } + if (row == null) continue; + String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK()); + try { + MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall); + final Map attributes = url.getAttributes(); + final boolean isTokenProtectedAPI = attributes.containsKey(TransactionManager.TRANSACTION_TOKEN_PARAM); + // use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes() + if (theapicall.length() > 1000 || isTokenProtectedAPI) { + // use a POST to execute the call + execPostAPICall(host, port, username, pass, client, l, url, isTokenProtectedAPI); + } else { + // use a GET to execute the call + ConcurrentLog.info("WorkTables", "executing url: " + url.toNormalform(true)); + try { + client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path% + if(client.getStatusCode() == HttpStatus.SC_METHOD_NOT_ALLOWED) { + /* GET method not allowed (HTTP 450 status) : this may be an old API entry, + * now restricted to HTTP POST and requiring a transaction token. We try now with POST. */ + execPostAPICall(host, port, username, pass, client, l, url, true); + } else { + l.put(url.toNormalform(true), client.getStatusCode()); + } + } catch (final IOException e) { + ConcurrentLog.logException(e); + l.put(url.toString(), -1); } - } catch (final IOException e) { - ConcurrentLog.logException(e); - l.put(url.toString(), -1); } + } catch (MalformedURLException ex) { + ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall); } - } catch (MalformedURLException ex) { - ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall); } + } catch (IOException e) { + ConcurrentLog.logException(e); } return l; } @@ -447,11 +450,10 @@ public class WorkTables extends Tables { */ public static int execGetAPICall(String host, int port, String path, byte[] pk, final String username, final String pass) { // now call the api URLs and store the result status - final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); - client.setTimout(120000); String url = "http://" + host + ":" + port + path; if (pk != null) url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(pk); - try { + try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { + client.setTimout(120000); client.GETbytes(url, username, pass, false); return client.getStatusCode(); } catch (final IOException e) { diff --git a/source/net/yacy/document/parser/sitemapParser.java b/source/net/yacy/document/parser/sitemapParser.java index 5486635c4..be52f72e7 100644 --- a/source/net/yacy/document/parser/sitemapParser.java +++ b/source/net/yacy/document/parser/sitemapParser.java @@ -114,9 +114,8 @@ public class sitemapParser extends AbstractParser implements Parser { public static SitemapReader parse(final DigestURL sitemapURL, final ClientIdentification.Agent agent) throws IOException { // download document ConcurrentLog.info("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true)); - final HTTPClient client = new HTTPClient(agent); // client.setHeader(requestHeader.entrySet()); - try { + try (final HTTPClient client = new HTTPClient(agent)) { client.GET(sitemapURL.toNormalform(false), false); if (client.getStatusCode() != 200) { throw new IOException("Unable to download the sitemap file " + sitemapURL + diff --git a/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java b/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java index 54bc8dbf9..37866ebc5 100644 --- a/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java +++ b/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java @@ -147,19 +147,12 @@ public class opensearchdescriptionReader extends DefaultHandler { public opensearchdescriptionReader(final String path, final ClientIdentification.Agent agent) { this(); this.agent = agent; - HTTPClient www = new HTTPClient(agent); - try { + try (HTTPClient www = new HTTPClient(agent)) { www.GET(path, false); final SAXParser saxParser = getParser(); saxParser.parse(www.getContentstream(), this); } catch (final Exception e) { ConcurrentLog.logException(e); - } finally { - try { - www.close(); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } } } @@ -170,8 +163,7 @@ public class opensearchdescriptionReader extends DefaultHandler { this.parsingTextValue = false; this.rssurl = null; this.atomurl = null; - HTTPClient www = new HTTPClient(this.agent); - try { + try (HTTPClient www = new HTTPClient(this.agent)) { www.GET(path, false); final SAXParser saxParser = getParser(); try { @@ -185,12 +177,6 @@ public class opensearchdescriptionReader extends DefaultHandler { } catch (final Exception e) { ConcurrentLog.warn("opensearchdescriptionReader", "parse exception: " + e); return false; - } finally { - try { - www.close(); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } } } diff --git a/source/net/yacy/http/ProxyHandler.java b/source/net/yacy/http/ProxyHandler.java index 5e1b482f2..2892c0039 100644 --- a/source/net/yacy/http/ProxyHandler.java +++ b/source/net/yacy/http/ProxyHandler.java @@ -132,12 +132,11 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler { RequestHeader proxyHeaders = ProxyHandler.convertHeaderFromJetty(request); setProxyHeaderForClient(request, proxyHeaders); - final HTTPClient client = new HTTPClient(ClientIdentification.yacyProxyAgent); - client.setTimout(timeout); - client.setHeader(proxyHeaders.entrySet()); - client.setRedirecting(false); // send request - try { + try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyProxyAgent)) { + client.setTimout(timeout); + client.setHeader(proxyHeaders.entrySet()); + client.setRedirecting(false); String queryString = request.getQueryString() != null ? "?" + request.getQueryString() : ""; DigestURL digestURI = new DigestURL(request.getScheme(), request.getServerName(), request.getServerPort(), request.getRequestURI() + queryString); if (request.getMethod().equals(HeaderFramework.METHOD_GET)) { @@ -219,8 +218,6 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler { } } catch (final SocketException se) { throw new ServletException("Socket Exception: " + se.getMessage()); - } finally { - client.close(); } // we handled this request, break out of handler chain diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 4260da7bf..5611b033e 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -159,11 +159,11 @@ public final class Protocol { final String path, final Map parts, final int timeout) throws IOException { - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); - httpClient.setTimout(timeout); - MultiProtocolURL targetURL = new MultiProtocolURL(targetBaseURL, path); - this.result = httpClient.POSTbytes(targetURL, Seed.b64Hash2hexHash(targetHash) + ".yacyh", parts, false, - true); + try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { + httpClient.setTimout(timeout); + MultiProtocolURL targetURL = new MultiProtocolURL(targetBaseURL, path); + this.result = httpClient.POSTbytes(targetURL, Seed.b64Hash2hexHash(targetHash) + ".yacyh", parts, false, true); + } } /** @@ -197,19 +197,16 @@ public final class Protocol { final String salt = crypt.randomSalt(); long responseTime = Long.MAX_VALUE; byte[] content = null; - try { + try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 30000)) { // generate request - final Map parts = - basicRequestParts(Switchboard.getSwitchboard(), null, salt); + final Map parts = basicRequestParts(Switchboard.getSwitchboard(), null, salt); parts.put("count", UTF8.StringBody("20")); parts.put("magic", UTF8.StringBody(Long.toString(Network.magic))); parts.put("seed", UTF8.StringBody(mySeed.genSeedStr(salt))); // send request final long start = System.currentTimeMillis(); // final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts); - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 30000); - content = - httpClient.POSTbytes( + content = httpClient.POSTbytes( new MultiProtocolURL(targetBaseURL, "/yacy/hello.html"), Seed.b64Hash2hexHash(targetHash) + ".yacyh", parts, @@ -433,41 +430,44 @@ public final class Protocol { parts.put("count", UTF8.StringBody(Integer.toString(maxCount))); parts.put("time", UTF8.StringBody(Long.toString(maxTime))); // final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts); - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, (int) maxTime); RSSReader reader = null; - for (final String ip: target.getIPs()) { - MultiProtocolURL targetBaseURL = null; - try { - targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps); - byte[] result; - try { - result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true); - } catch(final IOException e) { - if(targetBaseURL.isHTTPS()) { - /* Failed with https : retry with http */ - targetBaseURL = target.getPublicMultiprotocolURL(ip, false); - result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true); - if(result != null) { - /* Got something with http : mark peer SSL as unavailable on target peer */ - markSSLUnavailableOnPeer(seedDB, target, ip, "yacyClient.queryRemoteCrawlURLs"); - } - } else { - throw e; - } - } - reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); - } catch(MalformedURLException e) { - Network.log.warn("yacyClient.queryRemoteCrawlURLs malformed target URL for peer '" + target.getName() - + "' on address : " + ip); - } catch (final IOException e ) { - reader = null; - Network.log.warn("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null"); - } - if (reader != null) { - break; + try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, (int) maxTime)) { + for (final String ip: target.getIPs()) { + MultiProtocolURL targetBaseURL = null; + try { + targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps); + byte[] result; + try { + result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true); + } catch(final IOException e) { + if(targetBaseURL.isHTTPS()) { + /* Failed with https : retry with http */ + targetBaseURL = target.getPublicMultiprotocolURL(ip, false); + result = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true); + if(result != null) { + /* Got something with http : mark peer SSL as unavailable on target peer */ + markSSLUnavailableOnPeer(seedDB, target, ip, "yacyClient.queryRemoteCrawlURLs"); + } + } else { + throw e; + } + } + reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); + } catch(MalformedURLException e) { + Network.log.warn("yacyClient.queryRemoteCrawlURLs malformed target URL for peer '" + target.getName() + + "' on address : " + ip); + } catch (final IOException e ) { + reader = null; + Network.log.warn("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null"); + } + if (reader != null) { + break; + } + target.put(Seed.RCOUNT, "0"); + seedDB.peerActions.interfaceDeparture(target, ip); } - target.put(Seed.RCOUNT, "0"); - seedDB.peerActions.interfaceDeparture(target, ip); + } catch (IOException e) { + Network.log.warn(e); } final RSSFeed feed = reader == null ? null : reader.getFeed(); @@ -962,13 +962,14 @@ public final class Protocol { //resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts)); } - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 8000); - byte[] a = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL + "/yacy/search.html"), hostname, parts, false, true); - if (a != null && a.length > 200000) { - // there is something wrong. This is too large, maybe a hack on the other side? - a = null; + try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 8000)) { + byte[] a = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL + "/yacy/search.html"), hostname, parts, false, true); + if (a != null && a.length > 200000) { + // there is something wrong. This is too large, maybe a hack on the other side? + a = null; + } + resultMap = FileUtils.table(a); } - resultMap = FileUtils.table(a); // evaluate request result if ( resultMap == null || resultMap.isEmpty() ) { @@ -1628,25 +1629,26 @@ public final class Protocol { } parts.put("lurlEntry", UTF8.StringBody(crypt.simpleEncode(lurlstr, salt))); // send request - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 10000); - MultiProtocolURL targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps); byte[] content; - try { - content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"), - target.getHexHash() + ".yacyh", parts, false, true); - } catch(final IOException e) { - if(targetBaseURL.isHTTPS()) { - /* Failed using https : retry with http */ - targetBaseURL = target.getPublicMultiprotocolURL(ip, false); - content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"), - target.getHexHash() + ".yacyh", parts, false, true); - if(content != null) { - /* Success with http : mark SSL as unavailable on the target peer */ - markSSLUnavailableOnPeer(sb.peers, target, ip, "yacyClient.crawlReceipt"); - } - } else { - throw e; - } + try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 10000)) { + MultiProtocolURL targetBaseURL = target.getPublicMultiprotocolURL(ip, preferHttps); + try { + content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"), + target.getHexHash() + ".yacyh", parts, false, true); + } catch(final IOException e) { + if(targetBaseURL.isHTTPS()) { + /* Failed using https : retry with http */ + targetBaseURL = target.getPublicMultiprotocolURL(ip, false); + content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/crawlReceipt.html"), + target.getHexHash() + ".yacyh", parts, false, true); + if(content != null) { + /* Success with http : mark SSL as unavailable on the target peer */ + markSSLUnavailableOnPeer(sb.peers, target, ip, "yacyClient.crawlReceipt"); + } + } else { + throw e; + } + } } return FileUtils.table(content); } catch (final Exception e ) { @@ -1849,23 +1851,24 @@ public final class Protocol { parts.put("wordc", UTF8.StringBody(Integer.toString(indexes.size()))); parts.put("entryc", UTF8.StringBody(Integer.toString(indexcount))); parts.put("indexes", UTF8.StringBody(entrypost.toString())); - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout); byte[] content = null; - try { - content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"), - targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); - } catch(final IOException e) { - if(targetBaseURL.isHTTPS()) { - targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false); - /* Failed with https : retry with http on the same address */ - content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"), - targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); - if(content != null) { - /* Success with http : mark SSL as unavailable on the target peer */ - markSSLUnavailableOnPeer(Switchboard.getSwitchboard().peers, targetSeed, ip, "yacyClient.transferRWI"); - } - } else { - throw e; + try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout)) { + try { + content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"), + targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); + } catch(final IOException e) { + if(targetBaseURL.isHTTPS()) { + targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false); + /* Failed with https : retry with http on the same address */ + content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferRWI.html"), + targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); + if(content != null) { + /* Success with http : mark SSL as unavailable on the target peer */ + markSSLUnavailableOnPeer(Switchboard.getSwitchboard().peers, targetSeed, ip, "yacyClient.transferRWI"); + } + } else { + throw e; + } } } final Iterator v = FileUtils.strings(content); @@ -1953,20 +1956,21 @@ public final class Protocol { MultiProtocolURL targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, preferHttps); parts.put("urlc", UTF8.StringBody(Integer.toString(urlc))); - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout); byte[] content = null; - try { - content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferURL.html"), - targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); - } catch(final IOException e) { - if(targetBaseURL.isHTTPS()) { - targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false); - /* Failed with https : retry with http on the same address */ + try (final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout)) { + try { content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferURL.html"), - targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); - } else { - throw e; - } + targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); + } catch(final IOException e) { + if(targetBaseURL.isHTTPS()) { + targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, false); + /* Failed with https : retry with http on the same address */ + content = httpClient.POSTbytes(new MultiProtocolURL(targetBaseURL, "/yacy/transferURL.html"), + targetSeed.getHexHash() + ".yacyh", parts, gzipBody, true); + } else { + throw e; + } + } } final Iterator v = FileUtils.strings(content); @@ -1998,10 +2002,8 @@ public final class Protocol { SwitchboardConstants.NETWORK_PROTOCOL_HTTPS_PREFERRED_DEFAULT); for (final String ip : targetSeed.getIPs()) { - try { - final Map parts = - basicRequestParts(sb, targetSeed.hash, salt); - final HTTPClient httpclient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 15000); + try (final HTTPClient httpclient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 15000)) { + final Map parts = basicRequestParts(sb, targetSeed.hash, salt); MultiProtocolURL targetBaseURL = targetSeed.getPublicMultiprotocolURL(ip, preferHttps); byte[] content; try { diff --git a/source/net/yacy/peers/SeedDB.java b/source/net/yacy/peers/SeedDB.java index 8244a2e2a..ca0cd0a35 100644 --- a/source/net/yacy/peers/SeedDB.java +++ b/source/net/yacy/peers/SeedDB.java @@ -897,19 +897,20 @@ public final class SeedDB implements AlternativeDomainNames { reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache, no-store"); // httpc uses HTTP/1.0 is this necessary? reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.yacyInternetCrawlerAgent.userAgent); - final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); - client.setHeader(reqHeader.entrySet()); byte[] content = null; - try { - // send request - content = client.GETbytes(seedURL, null, null, false); - } catch (final Exception e) { - throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage()); - } - - // check response code - if (client.getHttpResponse().getStatusLine().getStatusCode() != 200) { - throw new IOException("Server returned status: " + client.getHttpResponse().getStatusLine()); + try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { + client.setHeader(reqHeader.entrySet()); + try { + // send request + content = client.GETbytes(seedURL, null, null, false); + } catch (final Exception e) { + throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage()); + } + + // check response code + if (client.getHttpResponse().getStatusLine().getStatusCode() != 200) { + throw new IOException("Server returned status: " + client.getHttpResponse().getStatusLine()); + } } try { @@ -1124,13 +1125,12 @@ public final class SeedDB implements AlternativeDomainNames { @Override public void run() { // load the seed list - try { + try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout)) { DigestURL url = new DigestURL(seedListFileURL); //final long start = System.currentTimeMillis(); final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.PRAGMA, "no-cache"); reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache, no-store"); - final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout); client.setHeader(reqHeader.entrySet()); client.HEADResponse(url.toNormalform(false), false); diff --git a/source/net/yacy/server/http/HTTPDProxyHandler.java b/source/net/yacy/server/http/HTTPDProxyHandler.java index 2d7ab3ebd..493cd7a6f 100644 --- a/source/net/yacy/server/http/HTTPDProxyHandler.java +++ b/source/net/yacy/server/http/HTTPDProxyHandler.java @@ -444,10 +444,10 @@ public final class HTTPDProxyHandler { requestHeader.remove(HeaderFramework.HOST); - final HTTPClient client = setupHttpClient(requestHeader, agent); - // send request - try { + try (final HTTPClient client = new HTTPClient(agent, timeout)) { + client.setHeader(requestHeader.entrySet()); + client.setRedirecting(false); client.GET(getUrl, false); if (log.isFinest()) log.finest(reqID +" response status: "+ client.getHttpResponse().getStatusLine()); @@ -596,20 +596,7 @@ public final class HTTPDProxyHandler { } } // end hasBody } catch(final SocketException se) { - // if opened ... -// if(res != null) { -// // client cut proxy connection, abort download -// res.abort(); -// } - client.close(); handleProxyException(se,conProp,respond,url); - } finally { - // if opened ... -// if(res != null) { -// // ... close connection -// res.closeStream(); -// } - client.close(); } } catch (final Exception e) { handleProxyException(e,conProp,respond,url); @@ -759,20 +746,6 @@ public final class HTTPDProxyHandler { return domain; } - /** - * creates a new HttpClient and sets parameters according to proxy needs - * - * @param requestHeader - * @return - */ - private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final ClientIdentification.Agent agent) { - // setup HTTP-client - final HTTPClient client = new HTTPClient(agent, timeout); - client.setHeader(requestHeader.entrySet()); - client.setRedirecting(false); - return client; - } - /** * determines in which form the response should be send and sets header accordingly * if the content length is not set we need to use chunked content encoding diff --git a/source/net/yacy/server/serverSwitch.java b/source/net/yacy/server/serverSwitch.java index 746a5900d..ab615e581 100644 --- a/source/net/yacy/server/serverSwitch.java +++ b/source/net/yacy/server/serverSwitch.java @@ -686,23 +686,13 @@ public class serverSwitch { final String[] uris = CommonPattern.COMMA.split(uri); for (String netdef : uris) { netdef = netdef.trim(); - try { + try (final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { final RequestHeader reqHeader = new RequestHeader(); - reqHeader - .put(HeaderFramework.USER_AGENT, - ClientIdentification.yacyInternetCrawlerAgent.userAgent); - final HTTPClient client = new HTTPClient( - ClientIdentification.yacyInternetCrawlerAgent); + reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.yacyInternetCrawlerAgent.userAgent); client.setHeader(reqHeader.entrySet()); - byte[] data = client - .GETbytes( - uri, - getConfig( - SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, - "admin"), - getConfig( - SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, - ""), false); + byte[] data = client.GETbytes(uri, + getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), + getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), false); if (data == null || data.length == 0) { continue; } diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 89499ef02..4ed9b59ce 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -532,9 +532,7 @@ public final class yacy { final String adminUser = config.getProperty(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"); // send 'wget' to web interface - final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); - // con.setHeader(requestHeader.entrySet()); - try { + try (final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { /* First get a valid transaction token using HTTP GET */ con.GETbytes("http://localhost:"+ port +"/" + path, adminUser, encodedPassword, false); @@ -600,9 +598,7 @@ public final class yacy { if (encodedPassword == null) encodedPassword = ""; // not defined // send 'wget' to web interface - final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); - // con.setHeader(requestHeader.entrySet()); - try { + try (final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent)) { con.GETbytes("http://localhost:"+ port +"/" + path, config.getProperty(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME,"admin"), encodedPassword, false); if (con.getStatusCode() > 199 && con.getStatusCode() < 300) { ConcurrentLog.config("COMMAND-STEERING", "YACY accepted steering command: " + processdescription);