From 90531f78ffcdb98ad7d5b7b49042e054af7452f7 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 22 Aug 2010 22:32:39 +0000 Subject: [PATCH] refactoring of the cora package to get subpackages for http and ftp (smb to come) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7063 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Network.java | 6 +- source/de/anomic/crawler/RobotsTxt.java | 4 +- .../anomic/crawler/retrieval/FTPLoader.java | 10 +- .../anomic/crawler/retrieval/FileLoader.java | 4 +- .../anomic/crawler/retrieval/HTTPLoader.java | 10 +- .../anomic/crawler/retrieval/SMBLoader.java | 4 +- source/de/anomic/data/SitemapParser.java | 4 +- source/de/anomic/data/WorkTables.java | 4 +- .../anomic/http/server/HTTPDProxyHandler.java | 16 +- .../de/anomic/search/MetadataRepository.java | 4 +- source/de/anomic/search/Switchboard.java | 10 +- source/de/anomic/tools/loaderThreads.java | 2 +- .../yacy/seedUpload/yacySeedUploadFtp.java | 5 +- source/de/anomic/yacy/yacyClient.java | 2 +- source/de/anomic/yacy/yacyRelease.java | 4 +- source/de/anomic/yacy/yacySeedDB.java | 4 +- .../yacy/cora/protocol/ConnectionInfo.java | 6 +- .../net/yacy/cora/protocol/HttpConnector.java | 7 +- .../yacy/cora/protocol/ftp/FTPClient.java} | 78 +-- .../http/GzipDecompressingEntity.java | 73 ++ .../protocol/http/GzipRequestInterceptor.java | 46 ++ .../http/GzipResponseInterceptor.java | 63 ++ .../yacy/cora/protocol/http/HTTPClient.java | 646 ++++++++++++++++++ .../cora/protocol/http/ProxySettings.java | 97 +++ source/net/yacy/yacy.java | 8 +- 25 files changed, 1020 insertions(+), 97 deletions(-) rename source/{de/anomic/net/ftpc.java => net/yacy/cora/protocol/ftp/FTPClient.java} (95%) create mode 100644 source/net/yacy/cora/protocol/http/GzipDecompressingEntity.java create mode 100644 source/net/yacy/cora/protocol/http/GzipRequestInterceptor.java create mode 100644 source/net/yacy/cora/protocol/http/GzipResponseInterceptor.java create mode 100644 source/net/yacy/cora/protocol/http/HTTPClient.java create mode 100644 source/net/yacy/cora/protocol/http/ProxySettings.java diff --git a/htroot/Network.java b/htroot/Network.java index 9dbb61cdf..50032806a 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -36,7 +36,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.util.DateFormatter; import net.yacy.kelondro.util.MapTools; @@ -149,7 +149,7 @@ public class Network { prop.put("table_my-url", seed.get(yacySeed.SEEDLIST, "")); // generating the location string - prop.putHTML("table_my-location", Client.generateLocation()); + prop.putHTML("table_my-location", HTTPClient.generateLocation()); } // overall results: Network statistics @@ -364,7 +364,7 @@ public class Network { userAgent = null; if (seed.hash != null && seed.hash.equals(sb.peers.mySeed().hash)) { userAgent = HTTPLoader.yacyUserAgent; - location = Client.generateLocation(); + location = HTTPClient.generateLocation(); } else { userAgent = sb.peers.peerActions.getUserAgent(seed.getIP()); location = parseLocationInUserAgent(userAgent); diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index b37da92a3..dd11a07aa 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -36,7 +36,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import net.yacy.cora.document.MultiProtocolURI; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.blob.BEncodedHeap; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; @@ -347,7 +347,7 @@ public class RobotsTxt { //TODO: adding Traffic statistic for robots download? // final Client client = new Client(10000, reqHeaders); // ResponseContainer res = null; - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setHeader(reqHeaders.entrySet()); try { // check for interruption diff --git a/source/de/anomic/crawler/retrieval/FTPLoader.java b/source/de/anomic/crawler/retrieval/FTPLoader.java index 58a496d8a..507e45123 100644 --- a/source/de/anomic/crawler/retrieval/FTPLoader.java +++ b/source/de/anomic/crawler/retrieval/FTPLoader.java @@ -33,6 +33,7 @@ import java.io.PrintStream; import java.util.Date; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ftp.FTPClient; import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; @@ -42,7 +43,6 @@ import de.anomic.crawler.Latency; import de.anomic.http.server.HeaderFramework; import de.anomic.http.server.RequestHeader; import de.anomic.http.server.ResponseHeader; -import de.anomic.net.ftpc; import de.anomic.search.Segments; import de.anomic.search.Switchboard; @@ -95,7 +95,7 @@ public class FTPLoader { // create new ftp client final PrintStream err = new PrintStream(berr); - final ftpc ftpClient = new ftpc(System.in, null, err); + final FTPClient ftpClient = new FTPClient(System.in, null, err); ftpClient.setDataTimeoutByMaxFilesize(maxFileSize); // get a connection @@ -162,7 +162,7 @@ public class FTPLoader { /** * @param ftpClient */ - private void closeConnection(final ftpc ftpClient) { + private void closeConnection(final FTPClient ftpClient) { // closing connection ftpClient.exec("close", false); ftpClient.exec("exit", false); @@ -171,7 +171,7 @@ public class FTPLoader { /** * establish a connection to the ftp server (open, login, set transfer mode) */ - private boolean openConnection(final ftpc ftpClient, final DigestURI entryUrl) { + private boolean openConnection(final FTPClient ftpClient, final DigestURI entryUrl) { // get username and password final String userInfo = entryUrl.getUserInfo(); String userName = "anonymous", userPwd = "anonymous"; @@ -208,7 +208,7 @@ public class FTPLoader { return true; } - private Response getFile(final ftpc ftpClient, final Request request, boolean acceptOnlyParseable) throws Exception { + private Response getFile(final FTPClient ftpClient, final Request request, boolean acceptOnlyParseable) throws Exception { // determine the mimetype of the resource final DigestURI url = request.url(); final String mime = TextParser.mimeOf(url); diff --git a/source/de/anomic/crawler/retrieval/FileLoader.java b/source/de/anomic/crawler/retrieval/FileLoader.java index e818fd570..8abc4d9a0 100644 --- a/source/de/anomic/crawler/retrieval/FileLoader.java +++ b/source/de/anomic/crawler/retrieval/FileLoader.java @@ -29,11 +29,11 @@ import java.util.List; import de.anomic.http.server.HeaderFramework; import de.anomic.http.server.RequestHeader; import de.anomic.http.server.ResponseHeader; -import de.anomic.net.ftpc; import de.anomic.search.Segments; import de.anomic.search.Switchboard; import de.anomic.data.MimeTable; +import net.yacy.cora.protocol.ftp.FTPClient; import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; @@ -77,7 +77,7 @@ public class FileLoader { list.add(u + ((u.endsWith("/") || u.endsWith("\\")) ? "" : "/") + s); } - StringBuilder content = ftpc.dirhtml(u, null, null, null, list, true); + StringBuilder content = FTPClient.dirhtml(u, null, null, null, list, true); ResponseHeader responseHeader = new ResponseHeader(); responseHeader.put(HeaderFramework.LAST_MODIFIED, DateFormatter.formatRFC1123(new Date())); diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java index 3d1100e49..8f816406c 100644 --- a/source/de/anomic/crawler/retrieval/HTTPLoader.java +++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java @@ -28,7 +28,7 @@ import java.io.IOException; import java.util.Date; import net.yacy.cora.document.MultiProtocolURI; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.repository.Blacklist; @@ -49,8 +49,8 @@ public final class HTTPLoader { private static final String DEFAULT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7"; public static final long DEFAULT_MAXFILESIZE = 1024 * 1024 * 10; public static final int DEFAULT_CRAWLING_RETRY_COUNT = 5; - public static final String crawlerUserAgent = "yacybot (" + Client.getSystemOST() +") http://yacy.net/bot.html"; - public static final String yacyUserAgent = "yacy (" + Client.getSystemOST() +") yacy.net"; + public static final String crawlerUserAgent = "yacybot (" + HTTPClient.getSystemOST() +") http://yacy.net/bot.html"; + public static final String yacyUserAgent = "yacy (" + HTTPClient.getSystemOST() +") yacy.net"; /** * The socket timeout that should be used @@ -120,7 +120,7 @@ public final class HTTPLoader { // HTTP-Client // final Client client = new Client(socketTimeout, requestHeader); // ResponseContainer res = null; - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setTimout(socketTimeout); client.setHeader(requestHeader.entrySet()); // try { @@ -251,7 +251,7 @@ public final class HTTPLoader { // HTTP-Client // final Client client = new Client(20000, requestHeader); // ResponseContainer res = null; - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setTimout(20000); client.setHeader(requestHeader.entrySet()); // try { diff --git a/source/de/anomic/crawler/retrieval/SMBLoader.java b/source/de/anomic/crawler/retrieval/SMBLoader.java index b54f14dae..901f23142 100644 --- a/source/de/anomic/crawler/retrieval/SMBLoader.java +++ b/source/de/anomic/crawler/retrieval/SMBLoader.java @@ -42,11 +42,11 @@ import jcifs.smb.SmbFileInputStream; import de.anomic.http.server.HeaderFramework; import de.anomic.http.server.RequestHeader; import de.anomic.http.server.ResponseHeader; -import de.anomic.net.ftpc; import de.anomic.search.Segments; import de.anomic.search.Switchboard; import de.anomic.data.MimeTable; +import net.yacy.cora.protocol.ftp.FTPClient; import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; @@ -96,7 +96,7 @@ public class SMBLoader { list.add(u + s); } - StringBuilder content = ftpc.dirhtml(u, null, null, null, list, true); + StringBuilder content = FTPClient.dirhtml(u, null, null, null, list, true); ResponseHeader responseHeader = new ResponseHeader(); responseHeader.put(HeaderFramework.LAST_MODIFIED, DateFormatter.formatRFC1123(new Date())); diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java index 39045c742..8e1f244f8 100644 --- a/source/de/anomic/data/SitemapParser.java +++ b/source/de/anomic/data/SitemapParser.java @@ -34,7 +34,7 @@ import java.util.zip.GZIPInputStream; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.io.ByteCountInputStream; @@ -160,7 +160,7 @@ public class SitemapParser extends DefaultHandler { requestHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent); // final Client client = new Client(5000, requestHeader); // ResponseContainer res = null; - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setTimout(5000); client.setHeader(requestHeader.entrySet()); try { diff --git a/source/de/anomic/data/WorkTables.java b/source/de/anomic/data/WorkTables.java index e1d8c0f74..95033dd96 100644 --- a/source/de/anomic/data/WorkTables.java +++ b/source/de/anomic/data/WorkTables.java @@ -34,7 +34,7 @@ import java.util.Date; import java.util.LinkedHashMap; import java.util.Map; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; @@ -187,7 +187,7 @@ public class WorkTables extends Tables { */ public Map execAPICall(Collection pks, String host, int port, String realm) { // now call the api URLs and store the result status - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setRealm(realm); client.setTimout(120000); LinkedHashMap l = new LinkedHashMap(); diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java index 8d96f380c..cba903ff3 100644 --- a/source/de/anomic/http/server/HTTPDProxyHandler.java +++ b/source/de/anomic/http/server/HTTPDProxyHandler.java @@ -71,8 +71,8 @@ import java.util.logging.LogManager; import java.util.logging.Logger; import java.util.zip.GZIPOutputStream; -import net.yacy.cora.protocol.Client; -import net.yacy.cora.protocol.ProxySettings; +import net.yacy.cora.protocol.http.HTTPClient; +import net.yacy.cora.protocol.http.ProxySettings; import net.yacy.document.TextParser; import net.yacy.document.parser.html.ContentTransformer; import net.yacy.document.parser.html.Transformer; @@ -474,7 +474,7 @@ public final class HTTPDProxyHandler { final String connectHost = hostPart(host, port, yAddress); final String getUrl = "http://"+ connectHost + remotePath; - final Client client = setupHttpClient(requestHeader, connectHost); + final HTTPClient client = setupHttpClient(requestHeader, connectHost); // send request try { @@ -830,7 +830,7 @@ public final class HTTPDProxyHandler { final String getUrl = "http://"+ connectHost + remotePath; if (log.isFinest()) log.logFinest(reqID +" using url: "+ getUrl); - final Client client = setupHttpClient(requestHeader, connectHost); + final HTTPClient client = setupHttpClient(requestHeader, connectHost); // send request // try { @@ -939,7 +939,7 @@ public final class HTTPDProxyHandler { final int contentLength = requestHeader.getContentLength(); requestHeader.remove(RequestHeader.CONTENT_LENGTH); - final Client client = setupHttpClient(requestHeader, connectHost); + final HTTPClient client = setupHttpClient(requestHeader, connectHost); // check input if(body == null) { @@ -1121,11 +1121,11 @@ public final class HTTPDProxyHandler { * @param connectHost may be 'host:port' or 'host:port/path' * @return */ - private static Client setupHttpClient(final RequestHeader requestHeader, final String connectHost) { + private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final String connectHost) { // setup HTTP-client // final Client client = new Client(timeout, requestHeader); // client.setFollowRedirects(false); - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setTimout(timeout); client.setHeader(requestHeader.entrySet()); client.setRedirecting(false); @@ -1300,7 +1300,7 @@ public final class HTTPDProxyHandler { if (ProxySettings.use && ProxySettings.use4ssl) { // final Client remoteProxy = new Client(timeout, requestHeader); // remoteProxy.setFollowRedirects(false); // should not be needed, but safe is safe - final Client remoteProxy = setupHttpClient(requestHeader, host); + final HTTPClient remoteProxy = setupHttpClient(requestHeader, host); // ResponseContainer response = null; try { diff --git a/source/de/anomic/search/MetadataRepository.java b/source/de/anomic/search/MetadataRepository.java index 732530d9e..e9167834a 100644 --- a/source/de/anomic/search/MetadataRepository.java +++ b/source/de/anomic/search/MetadataRepository.java @@ -39,7 +39,7 @@ import java.util.Map; import java.util.TreeSet; import net.yacy.cora.document.MultiProtocolURI; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; @@ -269,7 +269,7 @@ public final class MetadataRepository implements Iterable { } log.logInfo("URLs vorher: " + urlIndexFile.size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size()); - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); final Iterator eiter2 = damagedURLS.iterator(); byte[] urlHashBytes; while (eiter2.hasNext()) { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 8f05a5798..fb6e98758 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -72,9 +72,9 @@ import java.util.zip.ZipInputStream; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.RSSMessage; -import net.yacy.cora.protocol.Client; import net.yacy.cora.protocol.ConnectionInfo; -import net.yacy.cora.protocol.ProxySettings; +import net.yacy.cora.protocol.http.HTTPClient; +import net.yacy.cora.protocol.http.ProxySettings; import net.yacy.document.Condenser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -1173,7 +1173,7 @@ public final class Switchboard extends serverSwitch { UPnP.deletePortMapping(); Tray.removeTray(); try { - Client.closeConnectionManager(); + HTTPClient.closeConnectionManager(); } catch (InterruptedException e) { Log.logException(e); } @@ -2359,7 +2359,7 @@ public final class Switchboard extends serverSwitch { reqHeader.put(HeaderFramework.PRAGMA, "no-cache"); reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent); - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000)); @@ -2510,7 +2510,7 @@ public final class Switchboard extends serverSwitch { public static Map loadFileAsMap(final DigestURI url) { final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent); - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); try { // sending request diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index 4bc154c62..9d92c50b5 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -24,7 +24,7 @@ package de.anomic.tools; import java.util.Hashtable; import net.yacy.cora.protocol.HttpConnector; -import net.yacy.cora.protocol.ProxySettings; +import net.yacy.cora.protocol.http.ProxySettings; import net.yacy.kelondro.data.meta.DigestURI; import de.anomic.crawler.retrieval.HTTPLoader; diff --git a/source/de/anomic/yacy/seedUpload/yacySeedUploadFtp.java b/source/de/anomic/yacy/seedUpload/yacySeedUploadFtp.java index cff9633bc..78b51473d 100644 --- a/source/de/anomic/yacy/seedUpload/yacySeedUploadFtp.java +++ b/source/de/anomic/yacy/seedUpload/yacySeedUploadFtp.java @@ -27,7 +27,8 @@ package de.anomic.yacy.seedUpload; import java.io.File; -import de.anomic.net.ftpc; +import net.yacy.cora.protocol.ftp.FTPClient; + import de.anomic.server.serverSwitch; import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedUploader; @@ -53,7 +54,7 @@ public class yacySeedUploadFtp implements yacySeedUploader { final File seedFTPPath = new File(sb.getConfig(CONFIG_FTP_PATH,null)); if ((seedFTPServer != null) && (seedFTPAccount != null) && (seedFTPPassword != null) && (seedFTPPath != null)) { - final String log = ftpc.put(seedFTPServer, seedFile, seedFTPPath.getParent(), seedFTPPath.getName(), seedFTPAccount, seedFTPPassword); + final String log = FTPClient.put(seedFTPServer, seedFile, seedFTPPath.getParent(), seedFTPPath.getName(), seedFTPAccount, seedFTPPassword); return log; } throw new Exception ("Seed upload settings not configured properly. password-len=" + diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 064a83013..290b4033a 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -1223,7 +1223,7 @@ public final class yacyClient { } } try { - net.yacy.cora.protocol.Client.closeConnectionManager(); + net.yacy.cora.protocol.http.HTTPClient.closeConnectionManager(); } catch (InterruptedException e) { Log.logException(e); } diff --git a/source/de/anomic/yacy/yacyRelease.java b/source/de/anomic/yacy/yacyRelease.java index ea6ec2119..b2776b860 100644 --- a/source/de/anomic/yacy/yacyRelease.java +++ b/source/de/anomic/yacy/yacyRelease.java @@ -46,7 +46,7 @@ import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import net.yacy.cora.document.MultiProtocolURI; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.document.parser.html.ContentScraper; import net.yacy.kelondro.io.CharBuffer; import net.yacy.kelondro.logging.Log; @@ -291,7 +291,7 @@ public final class yacyRelease extends yacyVersion { final String name = this.getUrl().getFileName(); byte[] signatureBytes = null; - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setTimout(6000); client.setHeader(reqHeader.entrySet()); diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 703ad7fbb..ca4e102fa 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -39,7 +39,7 @@ import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.blob.MapDataMining; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.Word; @@ -875,7 +875,7 @@ public final class yacySeedDB implements AlternativeDomainNames { // res.closeStream(); // } // } - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); byte[] content = null; try { diff --git a/source/net/yacy/cora/protocol/ConnectionInfo.java b/source/net/yacy/cora/protocol/ConnectionInfo.java index 17f7562a6..d2912c12a 100644 --- a/source/net/yacy/cora/protocol/ConnectionInfo.java +++ b/source/net/yacy/cora/protocol/ConnectionInfo.java @@ -176,7 +176,7 @@ public class ConnectionInfo { * * @param max connections */ - protected static void setMaxcount(final int max) { + public static void setMaxcount(final int max) { if (max > 0) maxcount = max; } @@ -185,7 +185,7 @@ public class ConnectionInfo { * * @param conInfo */ - protected static void addConnection(final ConnectionInfo conInfo) { + public static void addConnection(final ConnectionInfo conInfo) { allConnections.add(conInfo); } @@ -203,7 +203,7 @@ public class ConnectionInfo { * * @param id */ - protected static void removeConnection(final int id) { + public static void removeConnection(final int id) { removeConnection(new ConnectionInfo(null, null, null, id, 0, 0)); } diff --git a/source/net/yacy/cora/protocol/HttpConnector.java b/source/net/yacy/cora/protocol/HttpConnector.java index fc35ba582..eac0b9555 100644 --- a/source/net/yacy/cora/protocol/HttpConnector.java +++ b/source/net/yacy/cora/protocol/HttpConnector.java @@ -28,6 +28,7 @@ import java.util.Map.Entry; //import java.util.List; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.http.HTTPClient; //import org.apache.commons.httpclient.methods.multipart.Part; import org.apache.http.entity.mime.content.ContentBody; @@ -97,7 +98,7 @@ public class HttpConnector { * @throws IOException */ public static byte[] wput(final String url, final String vhost, LinkedHashMap post, final int timeout) throws IOException { - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setTimout(timeout); client.setUserAgent(HTTPLoader.yacyUserAgent); client.setHost(vhost); @@ -128,7 +129,7 @@ public class HttpConnector { * @throws IOException */ public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException { - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setTimout(timeout); client.setUserAgent(HTTPLoader.yacyUserAgent); client.setHost(vhost); @@ -160,7 +161,7 @@ public class HttpConnector { * @throws IOException */ public static byte[] wget(final String url, final Set> entrys, final int timeout, final String vhost) throws IOException { - final Client client = new Client(); + final HTTPClient client = new HTTPClient(); client.setHeader(entrys); client.setTimout(timeout); client.setHost(vhost); diff --git a/source/de/anomic/net/ftpc.java b/source/net/yacy/cora/protocol/ftp/FTPClient.java similarity index 95% rename from source/de/anomic/net/ftpc.java rename to source/net/yacy/cora/protocol/ftp/FTPClient.java index 7b1330692..b63af2eb9 100644 --- a/source/de/anomic/net/ftpc.java +++ b/source/net/yacy/cora/protocol/ftp/FTPClient.java @@ -1,34 +1,30 @@ -// ftpc.java -// (C) by Michael Peter Christen; mc@yacy.net -// first published on http://www.anomic.de -// Frankfurt, Germany, 2002, 2004, 2006 -// main implementation finished: 28.05.2002 -// last major change: 06.05.2004 -// added html generation for directories: 5.9.2006 -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.net; +/** + * FTPClient + * Copyright 2002, 2004, 2006, 2010 by Michael Peter Christen + * first published on http://yacy.net + * main implementation finished: 28.05.2002 + * last major change: 06.05.2004 + * added html generation for directories: 5.9.2006 + * migrated to the cora package and re-licensed under lgpl: 23.08.2010 + * + * This file is part of YaCy Content Integration + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.protocol.ftp; import java.io.BufferedOutputStream; import java.io.BufferedReader; @@ -69,7 +65,7 @@ import java.util.regex.Pattern; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.Domains; -public class ftpc { +public class FTPClient { private static final String vDATE = "20040506"; private static final String logPrefix = "FTPC: "; @@ -126,11 +122,11 @@ public class ftpc { // TODO: implement RFC 2640 Internationalization - public ftpc() { + public FTPClient() { this(System.in, System.out, System.err); } - public ftpc(final java.io.InputStream ins, final java.io.PrintStream outs, final java.io.PrintStream errs) { + public FTPClient(final java.io.InputStream ins, final java.io.PrintStream outs, final java.io.PrintStream errs) { // try { // System.setSecurityManager(new sm()); @@ -2561,7 +2557,7 @@ public class ftpc { public static List dir(final String host, final String remotePath, final String account, final String password, final boolean extended) { try { - final ftpc c = new ftpc(); + final FTPClient c = new FTPClient(); c.cmd = new String[] { "open", host }; c.OPEN(); c.cmd = new String[] { "user", account, password }; @@ -2582,7 +2578,7 @@ public class ftpc { public static void dir(final String host, final String remotePath, final String account, final String password) { try { - final ftpc c = new ftpc(); + final FTPClient c = new FTPClient(); c.exec("open " + host, false); c.exec("user " + account + " " + password, false); c.exec("cd " + remotePath, false); @@ -2620,7 +2616,7 @@ public class ftpc { final String account, final String password) { // opens a new connection and returns a directory listing as html try { - final ftpc c = new ftpc(System.in, null, System.err); + final FTPClient c = new FTPClient(System.in, null, System.err); c.open(host, port); c.login(account, password); c.sys(); @@ -2720,7 +2716,7 @@ public class ftpc { final ByteArrayOutputStream berr = new ByteArrayOutputStream(); final PrintStream err = new PrintStream(berr); - final ftpc c = new ftpc(System.in, out, err); + final FTPClient c = new FTPClient(System.in, out, err); c.exec("open " + host, false); c.exec("user " + account + " " + password, false); if (remotePath != null) { @@ -2758,7 +2754,7 @@ public class ftpc { public static void get(final String host, String remoteFile, final File localPath, final String account, final String password) { try { - final ftpc c = new ftpc(); + final FTPClient c = new FTPClient(); if (remoteFile.length() == 0) { remoteFile = "/"; } @@ -2836,12 +2832,12 @@ public class ftpc { System.out.println(logPrefix + "try -h for command line options"); System.out.println(logPrefix); if (args.length == 0) { - (new ftpc()).shell(null); + (new FTPClient()).shell(null); } else if (args.length == 1) { if (args[0].equals("-h")) { printHelp(); } else { - (new ftpc()).shell(args[0]); + (new FTPClient()).shell(args[0]); } } else if (args.length == 2) { printHelp(); diff --git a/source/net/yacy/cora/protocol/http/GzipDecompressingEntity.java b/source/net/yacy/cora/protocol/http/GzipDecompressingEntity.java new file mode 100644 index 000000000..e14282391 --- /dev/null +++ b/source/net/yacy/cora/protocol/http/GzipDecompressingEntity.java @@ -0,0 +1,73 @@ +/** + * GzipDecompressingEntity + * Copyright 2010 by Sebastian Gaebel + * First released 01.07.2010 at http://yacy.net + * + * $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $ + * $LastChangedRevision: 7011 $ + * $LastChangedBy: sixcooler $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + + +package net.yacy.cora.protocol.http; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.GZIPInputStream; + +import org.apache.http.HttpEntity; +import org.apache.http.entity.HttpEntityWrapper; + +public class GzipDecompressingEntity extends HttpEntityWrapper { + + private static final int DEFAULT_BUFFER_SIZE = 1024; // this is also the maximum chunk size + + public GzipDecompressingEntity(final HttpEntity entity) { + super(entity); + } + + public InputStream getContent() throws IOException, IllegalStateException { + + // the wrapped entity's getContent() decides about repeatability + InputStream wrappedin = wrappedEntity.getContent(); + + return new GZIPInputStream(wrappedin); + } + + public void writeTo(OutputStream outstream) throws IOException { + if (outstream == null) { + throw new IllegalArgumentException("Output stream may not be null"); + } + InputStream instream = this.getContent(); + int l; + byte[] tmp = new byte[DEFAULT_BUFFER_SIZE]; + while ((l = instream.read(tmp)) != -1) { + outstream.write(tmp, 0, l); + } + } + + public boolean isChunked() { + return true; + } + + public long getContentLength() { + // length of ungzipped content not known in advance + return -1; + } + +} diff --git a/source/net/yacy/cora/protocol/http/GzipRequestInterceptor.java b/source/net/yacy/cora/protocol/http/GzipRequestInterceptor.java new file mode 100644 index 000000000..752d938bd --- /dev/null +++ b/source/net/yacy/cora/protocol/http/GzipRequestInterceptor.java @@ -0,0 +1,46 @@ +/** + * GzipRequestInterceptor + * Copyright 2010 by Sebastian Gaebel + * First released 01.07.2010 at http://yacy.net + * + * $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $ + * $LastChangedRevision: 7001 $ + * $LastChangedBy: sixcooler $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + + +package net.yacy.cora.protocol.http; + +import java.io.IOException; + +import org.apache.http.HttpException; +import org.apache.http.HttpRequest; +import org.apache.http.HttpRequestInterceptor; +import org.apache.http.protocol.HttpContext; + +public class GzipRequestInterceptor implements HttpRequestInterceptor { + + private static final String ACCEPT_ENCODING = "Accept-Encoding"; + private static final String GZIP_CODEC = "gzip"; + + public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException { + if (!request.containsHeader(ACCEPT_ENCODING)) { + request.addHeader(ACCEPT_ENCODING, GZIP_CODEC); + } + } + +} diff --git a/source/net/yacy/cora/protocol/http/GzipResponseInterceptor.java b/source/net/yacy/cora/protocol/http/GzipResponseInterceptor.java new file mode 100644 index 000000000..da1919992 --- /dev/null +++ b/source/net/yacy/cora/protocol/http/GzipResponseInterceptor.java @@ -0,0 +1,63 @@ +/** + * GzipResponseInterceptor + * Copyright 2010 by Sebastian Gaebel + * First released 01.07.2010 at http://yacy.net + * + * $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $ + * $LastChangedRevision: 7001 $ + * $LastChangedBy: sixcooler $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + + +package net.yacy.cora.protocol.http; + +import java.io.IOException; + + +import org.apache.http.Header; +import org.apache.http.HeaderElement; +import org.apache.http.HttpEntity; +import org.apache.http.HttpException; +import org.apache.http.HttpResponse; +import org.apache.http.HttpResponseInterceptor; +import org.apache.http.protocol.HttpContext; + +public class GzipResponseInterceptor implements HttpResponseInterceptor { + + private static final String GZIP_CODEC = "gzip"; + + public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { + if (context == null) { + throw new IllegalArgumentException("HTTP context may not be null"); + } + HttpEntity entity = response.getEntity(); + if (entity != null) { + Header ceheader = entity.getContentEncoding(); + if (ceheader != null) { + HeaderElement[] codecs = ceheader.getElements(); + for (int i = 0; i < codecs.length; i++) { + if (codecs[i].getName().equalsIgnoreCase(GZIP_CODEC)) { +// response.removeHeader(ceheader); + response.setEntity(new GzipDecompressingEntity(response.getEntity())); + return; + } + } + } + } + } + +} diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java new file mode 100644 index 000000000..1fa35b690 --- /dev/null +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -0,0 +1,646 @@ +/** + * Client + * Copyright 2010 by Sebastian Gaebel + * First released 01.07.2010 at http://yacy.net + * + * $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $ + * $LastChangedRevision: 7020 $ + * $LastChangedBy: sixcooler $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + + +package net.yacy.cora.protocol.http; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.util.LinkedHashMap; +import java.util.Set; +import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; + +import net.yacy.cora.protocol.ConnectionInfo; + +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.HttpResponse; +import org.apache.http.HttpVersion; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.client.params.HttpClientParams; +import org.apache.http.conn.ClientConnectionManager; +import org.apache.http.conn.params.ConnManagerParams; +import org.apache.http.conn.params.ConnPerRouteBean; +import org.apache.http.conn.params.ConnRouteParams; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.conn.scheme.PlainSocketFactory; +import org.apache.http.conn.scheme.Scheme; +import org.apache.http.conn.scheme.SchemeRegistry; +import org.apache.http.conn.ssl.SSLSocketFactory; +import org.apache.http.entity.InputStreamEntity; +import org.apache.http.entity.mime.MultipartEntity; +import org.apache.http.entity.mime.content.ContentBody; +import org.apache.http.entity.mime.content.StringBody; +import org.apache.http.impl.client.AbstractHttpClient; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager; +import org.apache.http.message.BasicHeader; +import org.apache.http.params.BasicHttpParams; +import org.apache.http.params.HttpConnectionParams; +import org.apache.http.params.HttpParams; +import org.apache.http.params.HttpProtocolParams; +import org.apache.http.protocol.BasicHttpContext; +import org.apache.http.protocol.HTTP; +import org.apache.http.protocol.HttpContext; +import org.apache.http.util.EntityUtils; + + +/** + * HttpClient implementation which uses HttpComponents Client {@link http://hc.apache.org/} + * + * @author sixcooler + * + */ +public class HTTPClient { + + private final static int maxcon = 20; + private static IdledConnectionEvictor idledConnectionEvictor = null; + private static HttpClient httpClient = null; + private Header[] headers = null; + private HttpResponse httpResponse = null; + private HttpUriRequest currentRequest = null; + private long upbytes = 0L; + private int timeout = 10000; + private String userAgent = null; + private String host = null; + private boolean redirecting = true; + private String realm = null; + + public HTTPClient() { + super(); + if (httpClient == null) { + initConnectionManager(); + } + } + + public static void setDefaultUserAgent(final String defaultAgent) { + HttpProtocolParams.setUserAgent(httpClient.getParams(), defaultAgent); + } + + private static void initConnectionManager() { + // Create and initialize HTTP parameters + final HttpParams httpParams = new BasicHttpParams(); + /** + * ConnectionManager settings + */ + // TODO: how much connections do we need? - default: 20 + ConnManagerParams.setMaxTotalConnections(httpParams, maxcon); + // for statistics same value should also be set here + ConnectionInfo.setMaxcount(maxcon); + // connections per host (2 default) + final ConnPerRouteBean connPerRoute = new ConnPerRouteBean(2); + // Increase max connections for localhost + HttpHost localhost = new HttpHost("locahost"); + connPerRoute.setMaxForRoute(new HttpRoute(localhost), maxcon); + ConnManagerParams.setMaxConnectionsPerRoute(httpParams, connPerRoute); + // how long to wait for getting a connection from manager in milliseconds + ConnManagerParams.setTimeout(httpParams, 3000L); + /** + * HTTP protocol settings + */ + HttpProtocolParams.setVersion(httpParams, HttpVersion.HTTP_1_1); + // UserAgent + HttpProtocolParams.setUserAgent(httpParams, "yacy (" + systemOST +") yacy.net"); + HttpProtocolParams.setUseExpectContinue(httpParams, true); + /** + * HTTP connection settings + */ + // timeout in milliseconds until a connection is established in milliseconds + HttpConnectionParams.setConnectionTimeout(httpParams, 10000); + // SO_LINGER affects the socket close operation in seconds + // HttpConnectionParams.setLinger(httpParams, 6); + // TODO: is default ok? + // HttpConnectionParams.setSocketBufferSize(httpParams, 8192); + // SO_TIMEOUT: maximum period inactivity between two consecutive data packets in milliseconds + HttpConnectionParams.setSoTimeout(httpParams, 5000); + // getting an I/O error when executing a request over a connection that has been closed at the server side + HttpConnectionParams.setStaleCheckingEnabled(httpParams, true); + // conserve bandwidth by minimizing the number of segments that are sent + HttpConnectionParams.setTcpNoDelay(httpParams, false); + // TODO: testing noreuse - there will be HttpConnectionParams.setSoReuseaddr(HttpParams params, boolean reuseaddr) in core-4.1 + + // Create and initialize scheme registry + final SchemeRegistry schemeRegistry = new SchemeRegistry(); + schemeRegistry.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80)); + schemeRegistry.register(new Scheme("https", SSLSocketFactory.getSocketFactory(), 443)); + + ClientConnectionManager clientConnectionManager = new ThreadSafeClientConnManager(httpParams, schemeRegistry); + + httpClient = new DefaultHttpClient(clientConnectionManager, httpParams); + // ask for gzip + ((AbstractHttpClient) httpClient).addRequestInterceptor(new GzipRequestInterceptor()); + // uncompress gzip + ((AbstractHttpClient) httpClient).addResponseInterceptor(new GzipResponseInterceptor()); + + idledConnectionEvictor = new IdledConnectionEvictor(clientConnectionManager); + idledConnectionEvictor.start(); + + } + + /** + * This method should be called just before shutdown + * to stop the ConnectionManager and idledConnectionEvictor + * + * @throws InterruptedException + */ + public static void closeConnectionManager() throws InterruptedException { + if (idledConnectionEvictor != null) { + // Shut down the evictor thread + idledConnectionEvictor.shutdown(); + idledConnectionEvictor.join(); + } + if (httpClient != null) { + // Shut down the connection manager + httpClient.getConnectionManager().shutdown(); + } + + } + + /** + * This method sets the Header used for the request + * + * @param entrys to be set as request header + */ + public void setHeader(final Set> entrys) { + if (entrys != null) { + int i = 0; + headers = new Header[entrys.size()]; + for (final Entry entry : entrys) { + headers[i++] = new BasicHeader(entry.getKey(),entry.getValue()); + } + } + } + + /** + * This method sets the timeout of the Connection and Socket + * + * @param timeout in milliseconds + */ + public void setTimout(final int timeout) { + this.timeout = timeout; + } + + /** + * This method sets the UserAgent to be used for the request + * + * @param userAgent + */ + public void setUserAgent(final String userAgent) { + this.userAgent = userAgent; + } + + /** + * This method sets the host to be called at the request + * + * @param host + */ + public void setHost(final String host) { + this.host = host; + } + + /** + * This method sets if requests should follow redirects + * + * @param redirecting + */ + public void setRedirecting(final boolean redirecting) { + this.redirecting = redirecting; + } + + /** + * This method sets the authorization realm for the request + * + * @param realm + */ + public void setRealm(final String realm) { + this.realm = realm; + } + + /** + * This method GETs a page from the server. + * + * @param uri the url to get + * @return content bytes + * @throws IOException + */ + public byte[] GETbytes(final String uri) throws IOException { + return GETbytes(uri, Long.MAX_VALUE); + } + + /** + * This method GETs a page from the server. + * + * @param uri the url to get + * @param maxBytes to get + * @return content bytes + * @throws IOException + */ + public byte[] GETbytes(final String uri, long maxBytes) throws IOException { + final HttpGet httpGet = new HttpGet(uri); + return getContentBytes(httpGet, maxBytes); + } + + /** + * This method GETs a page from the server. + * to be used for streaming out + * Please take care to call finish()! + * + * @param uri the url to get + * @throws IOException + */ + public void GET(final String uri) throws IOException { + if (currentRequest != null) throw new IOException("Client is in use!"); + final HttpGet httpGet = new HttpGet(uri); + currentRequest = httpGet; + execute(httpGet); + } + + /** + * This method gets HEAD response + * + * @param uri the url to Response from + * @return the HttpResponse + * @throws IOException + */ + public HttpResponse HEADResponse(final String uri) throws IOException { + final HttpHead httpHead = new HttpHead(uri); + execute(httpHead); + finish(); + ConnectionInfo.removeConnection(httpHead.hashCode()); + return httpResponse; + } + + /** + * This method POSTs a page from the server. + * to be used for streaming out + * Please take care to call finish()! + * + * @param uri the url to post + * @param instream the input to post + * @param length the contentlength + * @throws IOException + */ + public void POST(final String uri, final InputStream instream, long length) throws IOException { + if (currentRequest != null) throw new IOException("Client is in use!"); + final HttpPost httpPost = new HttpPost(uri); + final InputStreamEntity inputStreamEntity = new InputStreamEntity(instream, length); + // statistics + upbytes = length; + httpPost.setEntity(inputStreamEntity); + currentRequest = httpPost; + execute(httpPost); + } + + /** + * This method POSTs a page from the server. + * + * @param uri the url to post + * @param parts to post + * @return content bytes + * @throws IOException + */ + public byte[] POSTbytes(final String uri, final LinkedHashMap parts) throws IOException { + final HttpPost httpPost = new HttpPost(uri); + + final MultipartEntity multipartEntity = new MultipartEntity(); + for (Entry part : parts.entrySet()) + multipartEntity.addPart(part.getKey(), part.getValue()); + // statistics + upbytes = multipartEntity.getContentLength(); + + httpPost.setEntity(multipartEntity); + + return getContentBytes(httpPost, Long.MAX_VALUE); + } + + /** + * + * @return HttpResponse from call + */ + public HttpResponse getHttpResponse() { + return httpResponse; + } + + /** + * + * @return status code from http request + */ + public int getStatusCode() { + return httpResponse.getStatusLine().getStatusCode(); + } + + /** + * This method gets direct access to the content-stream + * Since this way is uncontrolled by the Client think of using 'writeTo' instead! + * Please take care to call finish()! + * + * @return the content as InputStream + * @throws IOException + */ + public InputStream getContentstream() throws IOException { + if (httpResponse != null && currentRequest != null) { + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null) try { + return httpEntity.getContent(); + } catch (final IOException e) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); + currentRequest = null; + throw e; + } + } + return null; + } + + /** + * This method streams the content to the outputStream + * Please take care to call finish()! + * + * @param outputStream + * @throws IOException + */ + public void writeTo(final OutputStream outputStream) throws IOException { + if (httpResponse != null && currentRequest != null) { + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null) try { + httpEntity.writeTo(outputStream); + outputStream.flush(); + // TODO: The name of this method is misnomer. + // It will be renamed to #finish() in the next major release of httpcore + httpEntity.consumeContent(); + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest = null; + } catch (final IOException e) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); + currentRequest = null; + throw e; + } + } + } + + /** + * This method ensures correct finish of client-connections + * This method should be used after every use of GET or POST and writeTo or getContentstream! + * + * @throws IOException + */ + public void finish() throws IOException { + if (httpResponse != null) { + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null && httpEntity.isStreaming()) { + // TODO: The name of this method is misnomer. + // It will be renamed to #finish() in the next major release of httpcore + httpEntity.consumeContent(); + } + } + if (currentRequest != null) { + ConnectionInfo.removeConnection(currentRequest.hashCode()); + currentRequest.abort(); + currentRequest = null; + } + } + + private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final long maxBytes) throws IOException { + byte[] content = null; + try { + execute(httpUriRequest); + if (httpResponse == null) return null; + // get the response body + final HttpEntity httpEntity = httpResponse.getEntity(); + if (httpEntity != null) { + if (httpEntity.getContentLength() < maxBytes) { + content = EntityUtils.toByteArray(httpEntity); + } + // TODO: The name of this method is misnomer. + // It will be renamed to #finish() in the next major release of httpcore + httpEntity.consumeContent(); + } + } catch (final IOException e) { + ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + httpUriRequest.abort(); + throw e; + } + ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + return content; + } + + private void execute(final HttpUriRequest httpUriRequest) throws IOException { + final HttpContext httpContext = new BasicHttpContext(); + setHeaders(httpUriRequest); + setParams(httpUriRequest.getParams()); + setProxy(httpUriRequest.getParams()); + // statistics + storeConnectionInfo(httpUriRequest); + try { + // execute the method + httpResponse = httpClient.execute(httpUriRequest, httpContext); + } catch (ClientProtocolException e) { + ConnectionInfo.removeConnection(httpUriRequest.hashCode()); + httpUriRequest.abort(); + throw new IOException("Client can't execute: " + e.getMessage()); + } + } + + private void setHeaders(final HttpUriRequest httpUriRequest) { + if (headers != null) { + for (Header header : headers) { + httpUriRequest.addHeader(header); + } + } + if (realm != null) + httpUriRequest.setHeader("Authorization", "realm=" + realm); + } + + private void setParams(final HttpParams httpParams) { + HttpClientParams.setRedirecting(httpParams, redirecting); + HttpConnectionParams.setConnectionTimeout(httpParams, timeout); + HttpConnectionParams.setSoTimeout(httpParams, timeout); + if (userAgent != null) + HttpProtocolParams.setUserAgent(httpParams, userAgent); + if (host != null) + httpParams.setParameter(HTTP.TARGET_HOST, host); + } + + private void setProxy(final HttpParams httpParams) { + if (ProxySettings.use) + ConnRouteParams.setDefaultProxy(httpParams, ProxySettings.getProxyHost()); + // TODO find a better way for this + ProxySettings.setProxyCreds((AbstractHttpClient) httpClient); + } + + private void storeConnectionInfo(final HttpUriRequest httpUriRequest) { + final int port = httpUriRequest.getURI().getPort(); + final String thost = httpUriRequest.getURI().getHost(); + ConnectionInfo.addConnection(new ConnectionInfo( + httpUriRequest.getURI().getScheme(), + port == 80 ? thost : thost + ":" + port, + httpUriRequest.getMethod() + " " + httpUriRequest.getURI().getPath(), + httpUriRequest.hashCode(), + System.currentTimeMillis(), + upbytes)); + } + + /** + * provide system information for client identification + */ + private static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " + + System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") + + "; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation(); + + /** + * generating the location string + * + * @return + */ + public static String generateLocation() { + String loc = System.getProperty("user.timezone", "nowhere"); + final int p = loc.indexOf('/'); + if (p > 0) { + loc = loc.substring(0, p); + } + loc = loc + "/" + System.getProperty("user.language", "dumb"); + return loc; + } + + /** + * @return the systemOST + */ + public static String getSystemOST() { + return systemOST; + } + + /** + * testing + * + * @param args urls to test + */ + public static void main(final String[] args) { + String url = null; + // prepare Parts + final LinkedHashMap newparts = new LinkedHashMap(); + try { + newparts.put("foo", new StringBody("FooBar")); + newparts.put("bar", new StringBody("BarFoo")); + } catch (UnsupportedEncodingException e) { + System.out.println(e.getStackTrace()); + } + HTTPClient client = new HTTPClient(); + client.setUserAgent("foobar"); + client.setRedirecting(false); + // Get some + for (int i = 0; i < args.length; i++) { + url = args[i]; + if (!url.toUpperCase().startsWith("HTTP://")) { + url = "http://" + url; + } + try { + System.out.println(new String(client.GETbytes(url))); + } catch (IOException e) { + e.printStackTrace(); + } + } + // Head some +// try { +// client.HEADResponse(url); +// } catch (IOException e) { +// e.printStackTrace(); +// } + for (Header header: client.getHttpResponse().getAllHeaders()) { + System.out.println("Header " + header.getName() + " : " + header.getValue()); +// for (HeaderElement element: header.getElements()) +// System.out.println("Element " + element.getName() + " : " + element.getValue()); + } + System.out.println(client.getHttpResponse().getLocale()); + System.out.println(client.getHttpResponse().getProtocolVersion()); + System.out.println(client.getHttpResponse().getStatusLine()); + // Post some +// try { +// System.out.println(new String(client.POSTbytes(url, newparts))); +// } catch (IOException e1) { +// e1.printStackTrace(); +// } + // Close out connection manager + try { + HTTPClient.closeConnectionManager(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + + /** + * + * @see: http://hc.apache.org/httpcomponents-client-4.0.1/tutorial/html/connmgmt.html#d4e638 + * + */ + public static class IdledConnectionEvictor extends Thread { + + private final ClientConnectionManager clientConnectionManager; + + private volatile boolean shutdown; + + public IdledConnectionEvictor(ClientConnectionManager clientConnectionManager) { + super(); + this.clientConnectionManager = clientConnectionManager; + } + + @Override + public void run() { + try { + while (!shutdown) { + synchronized (this) { + wait(5000); + // Close expired connections + clientConnectionManager.closeExpiredConnections(); + // Optionally, close connections + // that have been idle longer than 5 sec + // (some SOHO router act strange on >5sec idled connections) + clientConnectionManager.closeIdleConnections(5, TimeUnit.SECONDS); + } + } + } catch (InterruptedException ex) { + // terminate + } + } + + public void shutdown() { + shutdown = true; + synchronized (this) { + notifyAll(); + } + } + + } + +} diff --git a/source/net/yacy/cora/protocol/http/ProxySettings.java b/source/net/yacy/cora/protocol/http/ProxySettings.java new file mode 100644 index 000000000..3d6e729ce --- /dev/null +++ b/source/net/yacy/cora/protocol/http/ProxySettings.java @@ -0,0 +1,97 @@ +/** + * ProxySettings + * Copyright 2010 by Michael Peter Christen + * First released 25.05.2010 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.protocol.http; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.commons.httpclient.HostConfiguration; +import org.apache.commons.httpclient.HttpClient; +import org.apache.http.HttpHost; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.impl.client.AbstractHttpClient; + +/** + * settings for a remote proxy + * + */ +public final class ProxySettings { + + // Dummy value to associate with an Object in the backing Map + private static final Object PRESENT = new Object(); + + public static boolean use = false, use4YaCy = false, use4ssl = false; + public static String host = null, user = "", password = ""; + public static int port = 0; + public static String[] noProxy = null; + public static final Map allowProxy = new ConcurrentHashMap(); + public static final Map disallowProxy = new ConcurrentHashMap(); + + /** + * produce a HostConfiguration (apache object) with the proxy access information included + * @param apacheHttpClient + * @return a host configuration with proxy config if the proxy shall be used; a cloned configuration otherwise + */ + public static HostConfiguration getProxyHostConfig(HttpClient apacheHttpClient) { + final HostConfiguration hostConfig; + if (!use) return null; + hostConfig = new HostConfiguration(apacheHttpClient.getHostConfiguration()); + hostConfig.setProxy(host, port); + return hostConfig; + } + + /** + * + * @return the HttpHost to be used as proxy + */ + public static HttpHost getProxyHost() { + if (!use) return null; + return new HttpHost(host, port); + } + + public static void setProxyCreds(AbstractHttpClient httpClient) { + if (!use) return; + httpClient.getCredentialsProvider().setCredentials( + new AuthScope(host, port), + new UsernamePasswordCredentials(user, password)); + } + + /** + * tell if a remote proxy will be used for the given host + * @param host + * @return true, if the proxy shall be used for the given host + */ + public static boolean useForHost(final String host) { + if (!use) return false; + if (allowProxy.containsKey(host)) return true; + if (disallowProxy.containsKey(host)) return false; + for (String pattern: noProxy) { + if (host.matches(pattern)) { + disallowProxy.put(host, PRESENT); + return false; + } + } + allowProxy.put(host, PRESENT); + return true; + } + +} diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 232bff3ea..5f040a618 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -46,7 +46,7 @@ import java.util.concurrent.Semaphore; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; -import net.yacy.cora.protocol.Client; +import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.gui.YaCyApp; import net.yacy.gui.framework.Browser; import net.yacy.kelondro.blob.MapDataMining; @@ -298,9 +298,9 @@ public final class yacy { // set user-agent final String userAgent = "yacy/" + Double.toString(version) + " (www.yacy.net; " - + Client.getSystemOST() + ")"; + + HTTPClient.getSystemOST() + ")"; // Client.setUserAgent(userAgent); - Client.setDefaultUserAgent(userAgent); + HTTPClient.setDefaultUserAgent(userAgent); // start main threads final String port = sb.getConfig("port", "8080"); @@ -561,7 +561,7 @@ public final class yacy { final RequestHeader requestHeader = new RequestHeader(); requestHeader.put(RequestHeader.AUTHORIZATION, "realm=" + encodedPassword); // for http-authentify // final Client con = new Client(10000, requestHeader); - final Client con = new Client(); + final HTTPClient con = new HTTPClient(); con.setHeader(requestHeader.entrySet()); // ResponseContainer res = null; try {