From 022c6d3ce1d7c872f837d9de99e3590220733f55 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sun, 19 Jan 2014 15:21:23 +0100 Subject: [PATCH] do YaCy p2p connections using a timeout-request which covers the http request into a separate thread and ignores the furthure result of a request if that does not answer within the requested time-out. This is a try to solve a problem with the peer-ping, which hangs whenever a peer appears to be dead or blocked. --- .../contentcontrol/SMWListSyncThread.java | 2 +- .../cora/document/id/MultiProtocolURL.java | 4 +- .../federate/opensearch/SRURSSConnector.java | 2 +- .../yacy/cora/federate/yacy/api/Network.java | 2 +- .../yacy/cora/protocol/http/HTTPClient.java | 96 ++++++++++++------- .../yacy/crawler/retrieval/HTTPLoader.java | 4 +- source/net/yacy/data/WorkTables.java | 4 +- .../yacy/document/parser/sitemapParser.java | 2 +- .../xml/opensearchdescriptionReader.java | 4 +- source/net/yacy/http/ProxyHandler.java | 7 +- source/net/yacy/interaction/Interaction.java | 2 +- source/net/yacy/peers/Protocol.java | 16 ++-- source/net/yacy/peers/SeedDB.java | 2 +- .../net/yacy/peers/operation/yacyRelease.java | 4 +- source/net/yacy/search/Switchboard.java | 4 +- .../yacy/server/http/HTTPDProxyHandler.java | 2 +- source/net/yacy/server/serverSwitch.java | 2 +- source/net/yacy/yacy.java | 5 +- 18 files changed, 92 insertions(+), 72 deletions(-) diff --git a/source/net/yacy/contentcontrol/SMWListSyncThread.java b/source/net/yacy/contentcontrol/SMWListSyncThread.java index 6d611f14a..ff60ee7e0 100644 --- a/source/net/yacy/contentcontrol/SMWListSyncThread.java +++ b/source/net/yacy/contentcontrol/SMWListSyncThread.java @@ -79,7 +79,7 @@ public class SMWListSyncThread { + "/limit%3D200000" + "/format%3Dystat"); - String reply = UTF8.String(new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent).GETbytes(urlCount.toString(), null, null)); + String reply = UTF8.String(new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent).GETbytes(urlCount.toString(), null, null, false)); String overallcount = reply.split(",")[0]; String lastsyncstring = reply.split(",")[1]; this.currentmax = Integer.parseInt(overallcount); diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index eb3dd1068..3d804caa3 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -2059,7 +2059,7 @@ public class MultiProtocolURL implements Serializable, Comparable parts, final boolean usegzip) throws IOException { + public byte[] POSTbytes(final String uri, final Map parts, final boolean usegzip, final boolean concurrent) throws IOException { final MultiProtocolURL url = new MultiProtocolURL(uri); - return POSTbytes(url, url.getHost(), parts, usegzip); + return POSTbytes(url, url.getHost(), parts, usegzip, concurrent); } /** @@ -504,7 +509,7 @@ public class HTTPClient { * @return response body * @throws IOException */ - public byte[] POSTbytes(final MultiProtocolURL url, final String vhost, final Map post, final boolean usegzip) throws IOException { + public byte[] POSTbytes(final MultiProtocolURL url, final String vhost, final Map post, final boolean usegzip, final boolean concurrent) throws IOException { final HttpPost httpPost = new HttpPost(url.toNormalform(true)); setHost(vhost); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service @@ -523,7 +528,7 @@ public class HTTPClient { httpPost.setEntity(multipartEntity); } - return getContentBytes(httpPost, Integer.MAX_VALUE); + return getContentBytes(httpPost, Integer.MAX_VALUE, concurrent); } /** @@ -535,7 +540,7 @@ public class HTTPClient { * @return content bytes * @throws IOException */ - public byte[] POSTbytes(final String uri, final InputStream instream, final long length) throws IOException { + public byte[] POSTbytes(final String uri, final InputStream instream, final long length, final boolean concurrent) throws IOException { final MultiProtocolURL url = new MultiProtocolURL(uri); final HttpPost httpPost = new HttpPost(url.toNormalform(true)); String host = url.getHost(); @@ -546,7 +551,7 @@ public class HTTPClient { // statistics this.upbytes = length; httpPost.setEntity(inputStreamEntity); - return getContentBytes(httpPost, Integer.MAX_VALUE); + return getContentBytes(httpPost, Integer.MAX_VALUE, concurrent); } /** @@ -638,10 +643,10 @@ public class HTTPClient { } } - private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final int maxBytes) throws IOException { + private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final int maxBytes, final boolean concurrent) throws IOException { byte[] content = null; try { - execute(httpUriRequest); + execute(httpUriRequest, concurrent); if (this.httpResponse == null) return null; // get the response body final HttpEntity httpEntity = this.httpResponse.getEntity(); @@ -662,7 +667,7 @@ public class HTTPClient { return content; } - private void execute(final HttpUriRequest httpUriRequest) throws IOException { + private void execute(final HttpUriRequest httpUriRequest, final boolean concurrent) throws IOException { final HttpClientContext context = HttpClientContext.create(); context.setRequestConfig(reqConfBuilder.build()); if (this.host != null) @@ -685,9 +690,28 @@ public class HTTPClient { final long time = System.currentTimeMillis(); try { final CloseableHttpClient client = clientBuilder.build(); - this.httpResponse = client.execute(httpUriRequest, context); + if (concurrent) { + final CloseableHttpResponse[] thr = new CloseableHttpResponse[]{null}; + final Throwable[] te = new Throwable[]{null}; + Thread t = new Thread() { + public void run() { + try { + thr[0] = client.execute(httpUriRequest, context); + } catch (Throwable e) { + te[0] = e; + } + } + }; + t.start(); + try {t.join(this.timeout);} catch (InterruptedException e) {} + if (te[0] != null) throw te[0]; + if (thr[0] == null) throw new IOException("timout to client after " + this.timeout + "ms"); + this.httpResponse = thr[0]; + } else { + this.httpResponse = client.execute(httpUriRequest, context); + } this.httpResponse.setHeader(HeaderFramework.RESPONSE_TIME_MILLIS, Long.toString(System.currentTimeMillis() - time)); - } catch (final IOException e) { + } catch (final Throwable e) { ConnectionInfo.removeConnection(httpUriRequest.hashCode()); httpUriRequest.abort(); if (this.httpResponse != null) this.httpResponse.close(); @@ -822,7 +846,7 @@ public class HTTPClient { url = "http://" + url; } try { - System.out.println(UTF8.String(client.GETbytes(url, null, null))); + System.out.println(UTF8.String(client.GETbytes(url, null, null, true))); } catch (final IOException e) { e.printStackTrace(); } diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java index 9e541ca65..8b366c809 100644 --- a/source/net/yacy/crawler/retrieval/HTTPLoader.java +++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java @@ -128,7 +128,7 @@ public final class HTTPLoader { client.setHeader(requestHeader.entrySet()); // send request - final byte[] responseBody = client.GETbytes(url, sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), maxFileSize); + final byte[] responseBody = client.GETbytes(url, sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), maxFileSize, false); final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); String requestURLString = request.url().toNormalform(true); @@ -243,7 +243,7 @@ public final class HTTPLoader { final HTTPClient client = new HTTPClient(agent); client.setTimout(20000); client.setHeader(requestHeader.entrySet()); - final byte[] responseBody = client.GETbytes(request.url(), null, null); + final byte[] responseBody = client.GETbytes(request.url(), null, null, false); final int code = client.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader header = new ResponseHeader(code, client.getHttpResponse().getAllHeaders()); // FIXME: 30*-handling (bottom) is never reached diff --git a/source/net/yacy/data/WorkTables.java b/source/net/yacy/data/WorkTables.java index 05d75e8cb..d0ece8557 100644 --- a/source/net/yacy/data/WorkTables.java +++ b/source/net/yacy/data/WorkTables.java @@ -238,7 +238,7 @@ public class WorkTables extends Tables { url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK()); ConcurrentLog.info("WorkTables", "executing url: " + url); try { - client.GETbytes(url, username, pass); + client.GETbytes(url, username, pass, false); l.put(url, client.getStatusCode()); } catch (final IOException e) { ConcurrentLog.logException(e); @@ -255,7 +255,7 @@ public class WorkTables extends Tables { String url = "http://" + host + ":" + port + path; if (pk != null) url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(pk); try { - client.GETbytes(url, username, pass); + client.GETbytes(url, username, pass, false); return client.getStatusCode(); } catch (final IOException e) { ConcurrentLog.logException(e); diff --git a/source/net/yacy/document/parser/sitemapParser.java b/source/net/yacy/document/parser/sitemapParser.java index 531785ece..a2616a6a6 100644 --- a/source/net/yacy/document/parser/sitemapParser.java +++ b/source/net/yacy/document/parser/sitemapParser.java @@ -116,7 +116,7 @@ public class sitemapParser extends AbstractParser implements Parser { final HTTPClient client = new HTTPClient(agent); client.setHeader(requestHeader.entrySet()); try { - client.GET(sitemapURL.toString()); + client.GET(sitemapURL.toString(), false); if (client.getStatusCode() != 200) { throw new IOException("Unable to download the sitemap file " + sitemapURL + "\nServer returned status: " + client.getHttpResponse().getStatusLine()); diff --git a/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java b/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java index d956267ea..1234d7206 100644 --- a/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java +++ b/source/net/yacy/document/parser/xml/opensearchdescriptionReader.java @@ -149,7 +149,7 @@ public class opensearchdescriptionReader extends DefaultHandler { this.agent = agent; try { HTTPClient www = new HTTPClient(agent); - www.GET(path); + www.GET(path, false); final SAXParser saxParser = getParser(); saxParser.parse(www.getContentstream(), this); www.finish(); @@ -167,7 +167,7 @@ public class opensearchdescriptionReader extends DefaultHandler { this.atomurl = null; try { HTTPClient www = new HTTPClient(this.agent); - www.GET(path); + www.GET(path, false); final SAXParser saxParser = getParser(); try { saxParser.parse(www.getContentstream(), this); diff --git a/source/net/yacy/http/ProxyHandler.java b/source/net/yacy/http/ProxyHandler.java index 3830b2360..1013a5d0d 100644 --- a/source/net/yacy/http/ProxyHandler.java +++ b/source/net/yacy/http/ProxyHandler.java @@ -45,7 +45,6 @@ import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.http.HTTPClient; -import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.TextParser; import net.yacy.crawler.data.Cache; import net.yacy.crawler.retrieval.Response; @@ -110,11 +109,11 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler { String queryString = request.getQueryString() != null ? "?" + request.getQueryString() : ""; DigestURL digestURI = new DigestURL(request.getScheme(), request.getServerName(), request.getServerPort(), request.getPathInfo() + queryString); if (request.getMethod().equals(HeaderFramework.METHOD_GET)) { - client.GET(digestURI); + client.GET(digestURI, false); } else if (request.getMethod().equals(HeaderFramework.METHOD_POST)) { - client.POST(digestURI, request.getInputStream(), request.getContentLength()); + client.POST(digestURI, request.getInputStream(), request.getContentLength(), false); } else if (request.getMethod().equals(HeaderFramework.METHOD_HEAD)) { - client.HEADResponse(digestURI); + client.HEADResponse(digestURI, false); } else { throw new ServletException("Unsupported Request Method"); } diff --git a/source/net/yacy/interaction/Interaction.java b/source/net/yacy/interaction/Interaction.java index 1bf14f1b6..17e1684fe 100644 --- a/source/net/yacy/interaction/Interaction.java +++ b/source/net/yacy/interaction/Interaction.java @@ -191,7 +191,7 @@ public static String Tableentry(String url, String type, String comment, String "http://"+host.getPublicAddress()+"/interaction/Contribution.json" + "?url=" + url + "&comment=" + comment + "&from=" + from + "&peer=" + peer, - new HashMap(), false))); + new HashMap(), false, false))); } catch (final IOException e) { // TODO Auto-generated catch block e.printStackTrace(); diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 356d8456a..8e92c89fa 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -158,7 +158,7 @@ public final class Protocol { new MultiProtocolURL("http://" + targetAddress + "/yacy/" + filename), Seed.b64Hash2hexHash(targetPeerHash) + ".yacyh", parts, - false); + false, true); } /** @@ -201,7 +201,7 @@ public final class Protocol { new MultiProtocolURL("http://" + address + "/yacy/hello.html"), Seed.b64Hash2hexHash(otherHash) + ".yacyh", parts, - false); + false, true); responseTime = System.currentTimeMillis() - start; result = FileUtils.table(content); } catch (final Exception e ) { @@ -520,7 +520,7 @@ public final class Protocol { final byte[] result = httpClient.POSTbytes(new MultiProtocolURL("http://" + target.getClusterAddress() - + "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false); + + "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false, true); final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); if ( reader == null ) { Network.log.warn("yacyClient.queryRemoteCrawlURLs failed asking peer '" @@ -938,7 +938,7 @@ public final class Protocol { } final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, 8000); - byte[] a = httpClient.POSTbytes(new MultiProtocolURL("http://" + hostaddress + "/yacy/search.html"), hostname, parts, false); + byte[] a = httpClient.POSTbytes(new MultiProtocolURL("http://" + hostaddress + "/yacy/search.html"), hostname, parts, false, true); if (a != null && a.length > 200000) { // there is something wrong. This is too large, maybe a hack on the other side? a = null; @@ -1326,7 +1326,7 @@ public final class Protocol { new MultiProtocolURL("http://" + address + "/yacy/crawlReceipt.html"), target.getHexHash() + ".yacyh", parts, - false); + false, true); return FileUtils.table(content); } catch (final Exception e ) { // most probably a network time-out exception @@ -1504,7 +1504,7 @@ public final class Protocol { new MultiProtocolURL("http://" + address + "/yacy/transferRWI.html"), targetSeed.getHexHash() + ".yacyh", parts, - gzipBody); + gzipBody, true); final Iterator v = FileUtils.strings(content); // this should return a list of urlhashes that are unknown @@ -1562,7 +1562,7 @@ public final class Protocol { new MultiProtocolURL("http://" + address + "/yacy/transferURL.html"), targetSeed.getHexHash() + ".yacyh", parts, - gzipBody); + gzipBody, true); final Iterator v = FileUtils.strings(content); final Map result = FileUtils.table(v); @@ -1595,7 +1595,7 @@ public final class Protocol { new MultiProtocolURL("http://" + address + "/yacy/profile.html"), targetSeed.getHexHash() + ".yacyh", parts, - false); + false, true); return FileUtils.table(content); } catch (final Exception e ) { Network.log.warn("yacyClient.getProfile error:" + e.getMessage()); diff --git a/source/net/yacy/peers/SeedDB.java b/source/net/yacy/peers/SeedDB.java index 47afa59a9..a7554ed1a 100644 --- a/source/net/yacy/peers/SeedDB.java +++ b/source/net/yacy/peers/SeedDB.java @@ -849,7 +849,7 @@ public final class SeedDB implements AlternativeDomainNames { byte[] content = null; try { // send request - content = client.GETbytes(seedURL, null, null); + content = client.GETbytes(seedURL, null, null, false); } catch (final Exception e) { throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage()); } diff --git a/source/net/yacy/peers/operation/yacyRelease.java b/source/net/yacy/peers/operation/yacyRelease.java index dc5aedf32..2714e1a9a 100644 --- a/source/net/yacy/peers/operation/yacyRelease.java +++ b/source/net/yacy/peers/operation/yacyRelease.java @@ -294,14 +294,14 @@ public final class yacyRelease extends yacyVersion { // download signature first, if public key is available try { if (this.publicKey != null) { - final byte[] signatureData = client.GETbytes(getUrl().toString() + ".sig", null, null); + final byte[] signatureData = client.GETbytes(getUrl().toString() + ".sig", null, null, false); if (signatureData == null) { ConcurrentLog.warn("yacyVersion", "download of signature " + getUrl().toString() + " failed. ignoring signature file."); } else signatureBytes = Base64Order.standardCoder.decode(UTF8.String(signatureData).trim()); } client.setTimout(120000); - client.GET(getUrl().toString()); + client.GET(getUrl().toString(), false); int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader header = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 53947d450..c997c3341 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -3746,7 +3746,7 @@ public final class Switchboard extends serverSwitch { final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent, timeout); client.setHeader(reqHeader.entrySet()); - client.HEADResponse(url.toString()); + client.HEADResponse(url.toString(), false); int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); ResponseHeader header = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); if (checkAge) { @@ -3765,7 +3765,7 @@ public final class Switchboard extends serverSwitch { } } scc.incrementAndGet(); - final byte[] content = client.GETbytes(url, null, null); + final byte[] content = client.GETbytes(url, null, null, false); Iterator enu = FileUtils.strings(content); int lc = 0; while ( enu.hasNext() ) { diff --git a/source/net/yacy/server/http/HTTPDProxyHandler.java b/source/net/yacy/server/http/HTTPDProxyHandler.java index e2c86ca42..344412393 100644 --- a/source/net/yacy/server/http/HTTPDProxyHandler.java +++ b/source/net/yacy/server/http/HTTPDProxyHandler.java @@ -486,7 +486,7 @@ public final class HTTPDProxyHandler { // send request try { - client.GET(getUrl); + client.GET(getUrl, false); if (log.isFinest()) log.finest(reqID +" response status: "+ client.getHttpResponse().getStatusLine()); conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader); diff --git a/source/net/yacy/server/serverSwitch.java b/source/net/yacy/server/serverSwitch.java index 2ed89ecce..d682782e1 100644 --- a/source/net/yacy/server/serverSwitch.java +++ b/source/net/yacy/server/serverSwitch.java @@ -537,7 +537,7 @@ public class serverSwitch reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.yacyInternetCrawlerAgent.userAgent); final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); client.setHeader(reqHeader.entrySet()); - byte[] data = client.GETbytes(uri, getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"),getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "")); + byte[] data = client.GETbytes(uri, getConfig(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME, "admin"), getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), false); if ( data == null || data.length == 0 ) { continue; } diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 5431631dc..6d7dc12d3 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -227,9 +227,6 @@ public final class yacy { sb.setConfig("dataRoot", dataHome.toString()); yacyVersion.latestRelease = version; - // read environment - final int timeout = Math.max(5000, Integer.parseInt(sb.getConfig("httpdTimeout", "5000"))); - // create some directories final File htRootPath = new File(appHome, sb.getConfig("htRootPath", "htroot")); final File htDocsPath = sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT); @@ -523,7 +520,7 @@ public final class yacy { final HTTPClient con = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); con.setHeader(requestHeader.entrySet()); try { - con.GETbytes("http://localhost:"+ port +"/" + path, config.getProperty(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME,"admin"), encodedPassword); + con.GETbytes("http://localhost:"+ port +"/" + path, config.getProperty(SwitchboardConstants.ADMIN_ACCOUNT_USER_NAME,"admin"), encodedPassword, false); if (con.getStatusCode() > 199 && con.getStatusCode() < 300) { ConcurrentLog.config("COMMAND-STEERING", "YACY accepted steering command: " + processdescription);