From 661867923a3e111ec9b0d39fbb457523d4e5437a Mon Sep 17 00:00:00 2001 From: sixcooler Date: Sun, 22 Aug 2010 17:38:27 +0000 Subject: [PATCH] ... migrating to HttpComponents-Client-4.x ... The Client is dead, long live the Client! (no references to the old client) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7060 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/CrawlQueues.java | 7 ++- source/de/anomic/data/SitemapParser.java | 59 +++++++++++------- source/de/anomic/http/client/Client.java | 1 + source/de/anomic/search/Switchboard.java | 4 +- source/de/anomic/yacy/yacyClient.java | 17 ++++-- source/net/yacy/cora/protocol/Client.java | 61 +++++++++++++++++-- .../yacy/cora/protocol/ConnectionInfo.java | 2 +- source/net/yacy/yacy.java | 61 +++++++++++-------- 8 files changed, 148 insertions(+), 64 deletions(-) diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java index 6270beec5..2199ac4e6 100644 --- a/source/de/anomic/crawler/CrawlQueues.java +++ b/source/de/anomic/crawler/CrawlQueues.java @@ -37,6 +37,7 @@ import java.util.concurrent.ConcurrentHashMap; import net.yacy.cora.document.Hit; import net.yacy.cora.document.RSSFeed; +import net.yacy.cora.protocol.ConnectionInfo; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; @@ -46,7 +47,7 @@ import net.yacy.kelondro.workflow.WorkflowJob; import de.anomic.crawler.retrieval.HTTPLoader; import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Response; -import de.anomic.http.client.Client; +//import de.anomic.http.client.Client; import de.anomic.search.Switchboard; import de.anomic.search.SwitchboardConstants; import de.anomic.yacy.yacyClient; @@ -353,7 +354,7 @@ public class CrawlQueues { } // check again if (this.workers.size() >= sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10)) { - if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + Client.connectionCount()); + if (this.log.isFine()) log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount()); return false; } @@ -593,7 +594,7 @@ public class CrawlQueues { 1, e.getMessage() + " - in worker"); Log.logException(e); - Client.initConnectionManager(); +// Client.initConnectionManager(); this.request.setStatus("worker-exception", WorkflowJob.STATUS_FINISHED); } finally { crawlWorker w = workers.remove(code); diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java index 01877bbdb..39045c742 100644 --- a/source/de/anomic/data/SitemapParser.java +++ b/source/de/anomic/data/SitemapParser.java @@ -34,6 +34,7 @@ import java.util.zip.GZIPInputStream; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import net.yacy.cora.protocol.Client; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.io.ByteCountInputStream; @@ -47,10 +48,11 @@ import org.xml.sax.helpers.DefaultHandler; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.retrieval.HTTPLoader; import de.anomic.crawler.retrieval.Request; -import de.anomic.http.client.Client; +//import de.anomic.http.client.Client; import de.anomic.http.server.HeaderFramework; import de.anomic.http.server.RequestHeader; -import de.anomic.http.server.ResponseContainer; +//import de.anomic.http.server.ResponseContainer; +import de.anomic.http.server.ResponseHeader; import de.anomic.search.Segments; import de.anomic.search.Switchboard; @@ -156,22 +158,36 @@ public class SitemapParser extends DefaultHandler { // download document final RequestHeader requestHeader = new RequestHeader(); requestHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent); - final Client client = new Client(5000, requestHeader); - ResponseContainer res = null; +// final Client client = new Client(5000, requestHeader); +// ResponseContainer res = null; + final Client client = new Client(); + client.setTimout(5000); + client.setHeader(requestHeader.entrySet()); try { - res = client.GET(siteMapURL.toString()); - if (res.getStatusCode() != 200) { - logger.logWarning("Unable to download the sitemap file " + this.siteMapURL + - "\nServer returned status: " + res.getStatusLine()); - return; - } +// res = client.GET(siteMapURL.toString()); +// if (res.getStatusCode() != 200) { +// logger.logWarning("Unable to download the sitemap file " + this.siteMapURL + +// "\nServer returned status: " + res.getStatusLine()); +// return; +// } + try { + client.GET(siteMapURL.toString()); + if (client.getStatusCode() != 200) { + logger.logWarning("Unable to download the sitemap file " + this.siteMapURL + + "\nServer returned status: " + client.getHttpResponse().getStatusLine()); + return; + } // getting some metadata - final String contentMimeType = res.getResponseHeader().mime(); - this.contentLength = res.getResponseHeader().getContentLength(); - - try { - InputStream contentStream = res.getDataAsStream(); +// final String contentMimeType = res.getResponseHeader().mime(); +// this.contentLength = res.getResponseHeader().getContentLength(); + final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders()); + final String contentMimeType = header.mime(); + this.contentLength = header.getContentLength(); + +// try { +// InputStream contentStream = res.getDataAsStream(); + InputStream contentStream = client.getContentstream(); if ((contentMimeType != null) && (contentMimeType.equals("application/x-gzip") || contentMimeType.equals("application/gzip"))) { if (logger.isFine()) logger.logFine("Sitemap file has mimetype " + contentMimeType); @@ -186,15 +202,16 @@ public class SitemapParser extends DefaultHandler { saxParser.parse(counterStream, this); streamCounter += counterStream.getCount(); } finally { - res.closeStream(); +// res.closeStream(); + client.finish(); } } catch (final Exception e) { logger.logWarning("Unable to parse sitemap file " + this.siteMapURL, e); - } finally { - if (res != null) { - // release connection - res.closeStream(); - } +// } finally { +// if (res != null) { +// // release connection +// res.closeStream(); +// } } } diff --git a/source/de/anomic/http/client/Client.java b/source/de/anomic/http/client/Client.java index b58e956db..f52182dca 100644 --- a/source/de/anomic/http/client/Client.java +++ b/source/de/anomic/http/client/Client.java @@ -67,6 +67,7 @@ import de.anomic.http.server.ResponseHeader; * HttpClient implementation which uses Jakarta Commons HttpClient 3.x {@link http://hc.apache.org/httpclient-3.x/} * * @author danielr + * @deprecated please use net.yacy.cora.protocol.Client instead * */ public class Client { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 8bdbe65ab..8f05a5798 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -1154,7 +1154,7 @@ public final class Switchboard extends serverSwitch { indexingStorageProcessor.awaitShutdown(12000); crawlStacker.close(); this.dbImportManager.close(); - de.anomic.http.client.Client.closeAllConnections(); +// de.anomic.http.client.Client.closeAllConnections(); wikiDB.close(); blogDB.close(); blogCommentDB.close(); @@ -1510,7 +1510,7 @@ public final class Switchboard extends serverSwitch { } // close unused connections - de.anomic.http.client.Client.cleanup(); +// de.anomic.http.client.Client.cleanup(); ConnectionInfo.cleanUp(); // do transmission of CR-files diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index a4c7204b6..064a83013 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -87,7 +87,7 @@ import de.anomic.crawler.ResultURLs; import de.anomic.crawler.retrieval.EventOrigin; import de.anomic.crawler.retrieval.HTTPLoader; //import de.anomic.http.client.DefaultCharsetStringPart; -import de.anomic.http.client.Client; +//import de.anomic.http.client.Client; import de.anomic.http.server.HeaderFramework; import de.anomic.http.server.RequestHeader; import de.anomic.search.RankingProfile; @@ -1153,8 +1153,17 @@ public final class yacyClient { final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent); - final byte[] content = Client.wget( - "http://" + target.getPublicAddress() + "/yacy/search.html" + +// final byte[] content = Client.wget( +// "http://" + target.getPublicAddress() + "/yacy/search.html" + +// "?myseed=" + sb.peers.mySeed().genSeedStr(null) + +// "&youare=" + target.hash + "&key=" + +// "&myseed=" + sb.peers.mySeed() .genSeedStr(null) + +// "&count=10" + +// "&resource=global" + +// "&query=" + new String(wordhashe) + +// "&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT), +// reqHeader, 10000, target.getHexHash() + ".yacyh"); + final byte[] content = HttpConnector.wget("http://" + target.getPublicAddress() + "/yacy/search.html" + "?myseed=" + sb.peers.mySeed().genSeedStr(null) + "&youare=" + target.hash + "&key=" + "&myseed=" + sb.peers.mySeed() .genSeedStr(null) + @@ -1162,7 +1171,7 @@ public final class yacyClient { "&resource=global" + "&query=" + new String(wordhashe) + "&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT), - reqHeader, 10000, target.getHexHash() + ".yacyh"); + reqHeader.entrySet(), 10000, target.getHexHash() + ".yacyh"); final Map result = FileUtils.table(content); System.out.println("Result=" + result.toString()); } catch (final Exception e) { diff --git a/source/net/yacy/cora/protocol/Client.java b/source/net/yacy/cora/protocol/Client.java index b64922e88..e8814e396 100644 --- a/source/net/yacy/cora/protocol/Client.java +++ b/source/net/yacy/cora/protocol/Client.java @@ -101,6 +101,10 @@ public class Client { } } + public static void setDefaultUserAgent(final String defaultAgent) { + HttpProtocolParams.setUserAgent(httpClient.getParams(), defaultAgent); + } + private static void initConnectionManager() { // Create and initialize HTTP parameters final HttpParams httpParams = new BasicHttpParams(); @@ -162,7 +166,9 @@ public class Client { } /** - * this should be called just before shutdown + * This method should be called just before shutdown + * to stop the ConnectionManager and idledConnectionEvictor + * * @throws InterruptedException */ public static void closeConnectionManager() throws InterruptedException { @@ -179,6 +185,7 @@ public class Client { } /** + * This method sets the Header used for the request * * @param entrys to be set as request header */ @@ -193,6 +200,7 @@ public class Client { } /** + * This method sets the timeout of the Connection and Socket * * @param timeout in milliseconds */ @@ -201,6 +209,8 @@ public class Client { } /** + * This method sets the UserAgent to be used for the request + * * @param userAgent */ public void setUserAgent(final String userAgent) { @@ -208,6 +218,8 @@ public class Client { } /** + * This method sets the host to be called at the request + * * @param host */ public void setHost(final String host) { @@ -215,6 +227,7 @@ public class Client { } /** + * This method sets if requests should follow redirects * * @param redirecting */ @@ -223,7 +236,8 @@ public class Client { } /** - * set the authorization realm + * This method sets the authorization realm for the request + * * @param realm */ public void setRealm(final String realm) { @@ -343,7 +357,36 @@ public class Client { return httpResponse.getStatusLine().getStatusCode(); } + /** + * This method gets direct access to the content-stream + * Since this way is uncontrolled by the Client think of using 'writeTo' instead! + * Please take care to call finish()! + * + * @return the content as InputStream + * @throws IOException + */ + public InputStream getContentstream() throws IOException { + if (httpResponse != null && currentRequest != null) { + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null) try { + return httpEntity.getContent(); + } catch (final IOException e) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); + currentRequest = null; + throw e; + } + } + return null; + } + /** + * This method streams the content to the outputStream + * Please take care to call finish()! + * + * @param outputStream + * @throws IOException + */ public void writeTo(final OutputStream outputStream) throws IOException { if (httpResponse != null && currentRequest != null) { final HttpEntity httpEntity = httpResponse.getEntity(); @@ -356,14 +399,20 @@ public class Client { ConnectionInfo.removeConnection(currentRequest.hashCode()); currentRequest = null; } catch (final IOException e) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); currentRequest.abort(); - ConnectionInfo.removeConnection(currentRequest.hashCode()); currentRequest = null; throw e; } } } + /** + * This method ensures correct finish of client-connections + * This method should be used after every use of GET or POST and writeTo or getContentstream! + * + * @throws IOException + */ public void finish() throws IOException { if (httpResponse != null) { final HttpEntity httpEntity = httpResponse.getEntity(); @@ -374,8 +423,8 @@ public class Client { } } if (currentRequest != null) { - currentRequest.abort(); ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); currentRequest = null; } } @@ -396,8 +445,8 @@ public class Client { httpEntity.consumeContent(); } } catch (final IOException e) { - httpUriRequest.abort(); ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + httpUriRequest.abort(); throw e; } ConnectionInfo.removeConnection(httpUriRequest.hashCode()); @@ -415,8 +464,8 @@ public class Client { // execute the method httpResponse = httpClient.execute(httpUriRequest, httpContext); } catch (ClientProtocolException e) { - httpUriRequest.abort(); ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + httpUriRequest.abort(); throw new IOException("Client can't execute: " + e.getMessage()); } } diff --git a/source/net/yacy/cora/protocol/ConnectionInfo.java b/source/net/yacy/cora/protocol/ConnectionInfo.java index b84e1eab1..17f7562a6 100644 --- a/source/net/yacy/cora/protocol/ConnectionInfo.java +++ b/source/net/yacy/cora/protocol/ConnectionInfo.java @@ -145,7 +145,7 @@ public class ConnectionInfo { } /** - * removes stale connections + * @return how many bytes queued up */ public static long getActiveUpbytes() { long up = 0L; diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 3a7020acc..232bff3ea 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -24,11 +24,11 @@ package net.yacy; // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import java.io.BufferedInputStream; +//import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; +//import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -46,10 +46,11 @@ import java.util.concurrent.Semaphore; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +import net.yacy.cora.protocol.Client; import net.yacy.gui.YaCyApp; import net.yacy.gui.framework.Browser; import net.yacy.kelondro.blob.MapDataMining; -import net.yacy.kelondro.data.meta.DigestURI; +//import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; @@ -65,13 +66,13 @@ import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.ScoreCluster; import net.yacy.kelondro.util.OS; -import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +//import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import de.anomic.data.translator; -import de.anomic.http.client.Client; +//import de.anomic.http.client.Client; import de.anomic.http.server.HTTPDemon; import de.anomic.http.server.RequestHeader; -import de.anomic.http.server.ResponseContainer; +//import de.anomic.http.server.ResponseContainer; import de.anomic.search.MetadataRepository; import de.anomic.search.Segment; import de.anomic.search.Switchboard; @@ -298,7 +299,8 @@ public final class yacy { // set user-agent final String userAgent = "yacy/" + Double.toString(version) + " (www.yacy.net; " + Client.getSystemOST() + ")"; - Client.setUserAgent(userAgent); +// Client.setUserAgent(userAgent); + Client.setDefaultUserAgent(userAgent); // start main threads final String port = sb.getConfig("port", "8080"); @@ -415,16 +417,16 @@ public final class yacy { Log.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)"); } */ - Client.closeAllConnections(); - MultiThreadedHttpConnectionManager.shutdownAll(); +// Client.closeAllConnections(); +// MultiThreadedHttpConnectionManager.shutdownAll(); // idle until the processes are down if (server.isAlive()) { //Thread.sleep(2000); // wait a while server.interrupt(); - MultiThreadedHttpConnectionManager.shutdownAll(); +// MultiThreadedHttpConnectionManager.shutdownAll(); } - MultiThreadedHttpConnectionManager.shutdownAll(); +// MultiThreadedHttpConnectionManager.shutdownAll(); Log.logConfig("SHUTDOWN", "server has terminated"); sb.close(); } catch (final Exception e) { @@ -558,32 +560,37 @@ public final class yacy { // send 'wget' to web interface final RequestHeader requestHeader = new RequestHeader(); requestHeader.put(RequestHeader.AUTHORIZATION, "realm=" + encodedPassword); // for http-authentify - final Client con = new Client(10000, requestHeader); - ResponseContainer res = null; +// final Client con = new Client(10000, requestHeader); + final Client con = new Client(); + con.setHeader(requestHeader.entrySet()); +// ResponseContainer res = null; try { - res = con.GET("http://localhost:"+ port +"/" + path); +// res = con.GET("http://localhost:"+ port +"/" + path); + con.GETbytes("http://localhost:"+ port +"/" + path); // read response - if (res.getStatusLine().startsWith("2")) { +// if (res.getStatusLine().startsWith("2")) { + if (con.getStatusCode() > 199 && con.getStatusCode() < 300) { Log.logConfig("COMMAND-STEERING", "YACY accepted steering command: " + processdescription); - final ByteArrayOutputStream bos = new ByteArrayOutputStream(); - try { - FileUtils.copyToStream(new BufferedInputStream(res.getDataAsStream()), new BufferedOutputStream(bos)); - } finally { - res.closeStream(); - } +// final ByteArrayOutputStream bos = new ByteArrayOutputStream(); //This is stream is not used??? +// try { +// FileUtils.copyToStream(new BufferedInputStream(res.getDataAsStream()), new BufferedOutputStream(bos)); +// } finally { +// res.closeStream(); +// } } else { - Log.logSevere("COMMAND-STEERING", "error response from YACY socket: " + res.getStatusLine()); +// Log.logSevere("COMMAND-STEERING", "error response from YACY socket: " + res.getStatusLine()); + Log.logSevere("COMMAND-STEERING", "error response from YACY socket: " + con.getHttpResponse().getStatusLine()); System.exit(-1); } } catch (final IOException e) { Log.logSevere("COMMAND-STEERING", "could not establish connection to YACY socket: " + e.getMessage()); System.exit(-1); - } finally { - // release connection - if(res != null) { - res.closeStream(); - } +// } finally { +// // release connection +// if(res != null) { +// res.closeStream(); +// } } // finished