From 015d044c2572ce1665c842c4d4514eb4f323842f Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 10 May 2006 16:01:14 +0000 Subject: [PATCH] tried to fix some problems with latest changes to httpc very experimental! git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2078 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ViewFile.java | 2 +- source/de/anomic/data/robotsParser.java | 4 +- source/de/anomic/http/httpc.java | 132 ++++++++---------- source/de/anomic/http/httpdProxyHandler.java | 5 +- .../anomic/plasma/parser/odt/odtParser.java | 2 +- .../anomic/plasma/parser/rpm/rpmParser.java | 2 +- .../anomic/plasma/parser/vcf/vcfParser.java | 2 +- .../de/anomic/plasma/plasmaCrawlWorker.java | 4 +- source/de/anomic/plasma/plasmaParser.java | 2 +- .../anomic/urlRedirector/urlRedirectord.java | 2 +- source/de/anomic/yacy/yacyPeerActions.java | 2 +- source/yacy.java | 4 +- 12 files changed, 78 insertions(+), 85 deletions(-) diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 7ffaa1b3b..7259c109c 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -144,7 +144,7 @@ public class ViewFile { if (resHeader == null) { resHeader = sb.cacheManager.getCachedResponse(urlEntry.hash()); if (resHeader == null) { - resHeader = httpc.whead(url,5000,null,null,sb.remoteProxyConfig); + resHeader = httpc.whead(url,url.getHost(),5000,null,null,sb.remoteProxyConfig); if (resource == null) { prop.put("error",4); prop.put("viewMode",VIEW_MODE_NO_TEXT); diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java index 43c1b930e..5ac920697 100644 --- a/source/de/anomic/data/robotsParser.java +++ b/source/de/anomic/data/robotsParser.java @@ -309,9 +309,9 @@ public final class robotsParser{ (sb.remoteProxyConfig == null) || (!sb.remoteProxyConfig.useProxy()) ) { - con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https")); + con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https")); } else { - con = httpc.getInstance(robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig); + con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig); } // if we previously have downloaded this robots.txt then we can set the if-modified-since header diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index a74663cec..7aea4beeb 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -135,8 +135,9 @@ public final class httpc { // class variables private Socket socket = null; // client socket for commands private Thread socketOwner = null; - private String host = null; - //private long timeout; + private String adressed_host = null; + private int adressed_port = 80; + private String target_virtual_host = null; // output and input streams for client control connection PushbackInputStream clientInput = null; @@ -146,12 +147,10 @@ public final class httpc { private httpdByteCountOutputStream clientOutputByteCount = null; private boolean remoteProxyUse = false; - private String savedRemoteHost = null; private httpRemoteProxyConfig remoteProxyConfig = null; String requestPath = null; private boolean allowContentEncoding = true; - static boolean useYacyReferer = true; public static boolean yacyDebugMode = false; /** @@ -249,8 +248,8 @@ public final class httpc { * Convert the status of this class into an String object to output it. */ public String toString() { - return (this.savedRemoteHost == null) ? "Disconnected" : "Connected to " + this.savedRemoteHost + - ((this.remoteProxyUse) ? " via " + this.host : ""); + return (this.adressed_host == null) ? "Disconnected" : "Connected to " + this.adressed_host + + ((this.remoteProxyUse) ? " via " + adressed_host : ""); } /** @@ -269,6 +268,7 @@ public final class httpc { */ public static httpc getInstance( String server, + String vhost, int port, int timeout, boolean ssl, @@ -289,6 +289,7 @@ public final class httpc { try { newHttpc.init( server, + vhost, port, timeout, ssl, @@ -305,21 +306,23 @@ public final class httpc { public static httpc getInstance( String server, + String vhost, int port, int timeout, boolean ssl, httpRemoteProxyConfig remoteProxyConfig ) throws IOException { - return getInstance(server,port,timeout,ssl,remoteProxyConfig,null,null); + return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null); } public static httpc getInstance( String server, + String vhost, int port, int timeout, boolean ssl ) throws IOException { - return getInstance(server,port,timeout,ssl,null,null); + return getInstance(server,vhost,port,timeout,ssl,null,null); } @@ -336,6 +339,7 @@ public final class httpc { */ public static httpc getInstance( String server, + String vhost, int port, int timeout, boolean ssl, @@ -354,7 +358,7 @@ public final class httpc { // initialize it try { - newHttpc.init(server,port,timeout,ssl,incomingByteCountAccounting,outgoingByteCountAccounting); + newHttpc.init(server,vhost,port,timeout,ssl,incomingByteCountAccounting,outgoingByteCountAccounting); } catch (IOException e) { try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {} throw e; @@ -499,6 +503,7 @@ public final class httpc { */ void init( String server, + String vhost, int port, int timeout, boolean ssl, @@ -514,10 +519,12 @@ public final class httpc { String remoteProxyHost = theRemoteProxyConfig.getProxyHost(); int remoteProxyPort = theRemoteProxyConfig.getProxyPort(); - this.init(remoteProxyHost, remoteProxyPort, timeout, ssl,incomingByteCountAccounting,outgoingByteCountAccounting); + this.init(remoteProxyHost, vhost, remoteProxyPort, timeout, ssl,incomingByteCountAccounting,outgoingByteCountAccounting); this.remoteProxyUse = true; - this.savedRemoteHost = server + ((port == 80) ? "" : (":" + port)); + this.adressed_host = server; + this.adressed_port = port; + this.target_virtual_host = vhost; this.remoteProxyConfig = theRemoteProxyConfig; } @@ -532,7 +539,8 @@ public final class httpc { * @throws IOException */ void init( - String server, + String server, + String vhost, int port, int timeout, boolean ssl, @@ -542,15 +550,15 @@ public final class httpc { //serverLog.logDebug("HTTPC", handle + " initialized"); this.remoteProxyUse = false; //this.timeout = timeout; - //if(yacyDebugMode){ this.timeout=60000; } - this.savedRemoteHost = server; try { if (port == -1) { port = (ssl)? 443 : 80; } - this.host = server + ((port == 80) ? "" : (":" + port)); + this.adressed_host = server; + this.adressed_port = port; + this.target_virtual_host = vhost; InetAddress hostip; // if ((server.equals("localhost")) || (server.equals("127.0.0.1")) || (server.startsWith("192.168.")) || (server.startsWith("10."))) { // hostip = server; @@ -640,12 +648,12 @@ public final class httpc { this.clientOutputByteCount = null; } - this.host = null; + this.adressed_host = null; + this.target_virtual_host = null; //this.timeout = 0; this.remoteProxyUse = false; this.remoteProxyConfig = null; - this.savedRemoteHost = null; this.requestPath = null; this.allowContentEncoding = true; @@ -715,10 +723,11 @@ public final class httpc { // set the host attribute. This is in particular necessary, if we contact another proxy // the host is mandatory, if we use HTTP/1.1 if (!(header.containsKey(httpHeader.HOST))) { - if (this.remoteProxyUse) - header.put(httpHeader.HOST, this.savedRemoteHost); - else - header.put(httpHeader.HOST, this.host); + if (this.remoteProxyUse) { + header.put(httpHeader.HOST, this.adressed_host); + } else { + header.put(httpHeader.HOST, this.target_virtual_host); + } } if (this.remoteProxyUse) { @@ -759,7 +768,7 @@ public final class httpc { // send request if ((this.remoteProxyUse) && (!(method.equals(httpHeader.METHOD_CONNECT)))) - path = (this.savedRemoteHost.endsWith("443")?"https://":"http://") + this.savedRemoteHost + path; + path = ((this.adressed_port == 443) ? "https://" : "http://") + this.adressed_host + ":" + this.adressed_port + path; serverCore.send(this.clientOutput, method + " " + path + " HTTP/1.0"); // if set to HTTP/1.1, servers give time-outs? // send header @@ -1044,7 +1053,8 @@ do upload */ public static byte[] singleGET( - String host, + String realhost, + String virtualhost, int port, String path, int timeout, @@ -1063,11 +1073,10 @@ do upload httpc con = null; try { - if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) { - con = httpc.getInstance(host, port, timeout, ssl); + con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl); } else { - con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig); + con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig); } httpc.response res = con.GET(path, requestHeader); @@ -1084,7 +1093,8 @@ do upload } public static byte[] singleGET( - URL u, + URL u, + String vhost, int timeout, String user, String password, @@ -1096,7 +1106,7 @@ do upload String path = u.getPath(); String query = u.getQuery(); if ((query != null) && (query.length() > 0)) path = path + "?" + query; - return singleGET(u.getHost(), port, path, timeout, user, password, ssl, theRemoteProxyConfig, null); + return singleGET(u.getHost(), vhost, port, path, timeout, user, password, ssl, theRemoteProxyConfig, null); } /* @@ -1110,7 +1120,8 @@ do upload */ public static byte[] singlePOST( - String host, + String realhost, + String virtualhost, int port, String path, int timeout, @@ -1131,9 +1142,9 @@ do upload httpc con = null; try { if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) { - con = httpc.getInstance(host, port, timeout, ssl); + con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl); } else { - con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig); + con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig); } httpc.response res = con.POST(path, requestHeader, props, files); @@ -1152,7 +1163,7 @@ do upload public static byte[] singlePOST( URL u, - String host, + String vhost, int timeout, String user, String password, @@ -1167,7 +1178,8 @@ do upload String query = u.getQuery(); if ((query != null) && (query.length() > 0)) path = path + "?" + query; return singlePOST( - u.getHost(), + u.getHost(), + vhost, port, path, timeout, @@ -1205,13 +1217,13 @@ do upload public static ArrayList wget( URL url, - String host, + String vhost, int timeout, String user, String password, httpRemoteProxyConfig theRemoteProxyConfig ) throws IOException { - return wget(url, host,timeout,user,password,theRemoteProxyConfig,null); + return wget(url, vhost,timeout,user,password,theRemoteProxyConfig,null); } public static ArrayList wget(URL url) throws IOException{ @@ -1220,7 +1232,7 @@ do upload public static ArrayList wget( URL url, - String host, + String vhost, int timeout, String user, String password, @@ -1237,7 +1249,8 @@ do upload // splitting of the byte array into lines byte[] a = singleGET( - host, + url.getHost(), + vhost, port, path, timeout, @@ -1286,17 +1299,19 @@ do upload } public static httpHeader whead( - URL url, + URL url, + String vhost, int timeout, String user, String password, httpRemoteProxyConfig theRemoteProxyConfig ) throws IOException { - return whead(url,timeout,user,password,theRemoteProxyConfig,null); + return whead(url,vhost,timeout,user,password,theRemoteProxyConfig,null); } public static httpHeader whead( - URL url, + URL url, + String vhost, int timeout, String user, String password, @@ -1316,14 +1331,14 @@ do upload String path = url.getPath(); String query = url.getQuery(); if ((query != null) && (query.length() > 0)) path = path + "?" + query; - String host = url.getHost(); + String realhost = url.getHost(); // start connection httpc con = null; try { if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) - con = httpc.getInstance(host, port, timeout, ssl); - else con = httpc.getInstance(host, port, timeout, ssl, theRemoteProxyConfig); + con = httpc.getInstance(realhost, vhost, port, timeout, ssl); + else con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig); httpc.response res = con.HEAD(path, requestHeader); if (res.status.startsWith("2")) { @@ -1339,21 +1354,9 @@ do upload } } - /* - public static Vector wget(String url) { - try { - return wget(new URL(url), 5000, null, null, null, 0); - } catch (IOException e) { - Vector ll = new Vector(); - ll.add("503 " + e.getMessage()); - return ll; - } - } - */ - public static ArrayList wput( URL url, - String host, + String vhost, int timeout, String user, String password, @@ -1364,7 +1367,7 @@ do upload // splitting of the byte array into lines byte[] a = singlePOST( url, - host, + vhost, timeout, user, password, @@ -1385,19 +1388,6 @@ do upload return v; } - /* - public static Vector wput(String url, serverObjects props) { - try { - return wput(url, 5000, null, null, null, 0, props); - } catch (IOException e) { - serverLog.logError("HTTPC", "wput exception for URL " + url + ": " + e.getMessage(), e); - Vector ll = new Vector(); - ll.add("503 " + e.getMessage()); - return ll; - } - } - */ - public static void main(String[] args) { System.out.println("ANOMIC.DE HTTP CLIENT v" + vDATE); String url = args[0]; @@ -1692,7 +1682,7 @@ do upload if (p > 0) { this.responseHeader.add(buffer.substring(0, p).trim(), buffer.substring(p + 1).trim()); } else { - serverLog.logSevere("HTTPC", "RESPONSE PARSE ERROR: HOST='" + httpc.this.host + "', PATH='" + httpc.this.requestPath + "', STATUS='" + this.status + "'"); + serverLog.logSevere("HTTPC", "RESPONSE PARSE ERROR: HOST='" + httpc.this.adressed_host + "', PATH='" + httpc.this.requestPath + "', STATUS='" + this.status + "'"); serverLog.logSevere("HTTPC", "..............BUFFER: " + buffer); throw new IOException(this.status); } diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index ab9ca7b94..d54aad270 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -1139,6 +1139,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt httpc remoteProxy = null; try { remoteProxy = httpc.getInstance( + host, host, port, timeout, @@ -1279,6 +1280,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt if (useProxy) { return httpc.getInstance( server, + server, port, timeout, false, @@ -1286,7 +1288,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt ); } return httpc.getInstance( - server, + server, + server, port, timeout, false diff --git a/source/de/anomic/plasma/parser/odt/odtParser.java b/source/de/anomic/plasma/parser/odt/odtParser.java index f029df9d7..7a5756133 100644 --- a/source/de/anomic/plasma/parser/odt/odtParser.java +++ b/source/de/anomic/plasma/parser/odt/odtParser.java @@ -202,7 +202,7 @@ public class odtParser extends AbstractParser implements Parser { testParser.setLogger(new serverLog("PARSER.ODT")); // downloading the document content - byte[] content = httpc.singleGET(contentUrl, 10000, null, null, null); + byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null); ByteArrayInputStream input = new ByteArrayInputStream(content); // parsing the document diff --git a/source/de/anomic/plasma/parser/rpm/rpmParser.java b/source/de/anomic/plasma/parser/rpm/rpmParser.java index 354a91e6c..095f01723 100644 --- a/source/de/anomic/plasma/parser/rpm/rpmParser.java +++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java @@ -171,7 +171,7 @@ public class rpmParser extends AbstractParser implements Parser { URL contentUrl = new URL(args[0]); rpmParser testParser = new rpmParser(); - byte[] content = httpc.singleGET(contentUrl, 10000, null, null, null); + byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null); ByteArrayInputStream input = new ByteArrayInputStream(content); testParser.parse(contentUrl, "application/x-rpm", input); } catch (Exception e) { diff --git a/source/de/anomic/plasma/parser/vcf/vcfParser.java b/source/de/anomic/plasma/parser/vcf/vcfParser.java index 32d63a32c..991988d62 100644 --- a/source/de/anomic/plasma/parser/vcf/vcfParser.java +++ b/source/de/anomic/plasma/parser/vcf/vcfParser.java @@ -260,7 +260,7 @@ public class vcfParser extends AbstractParser implements Parser { URL contentUrl = new URL(args[0]); vcfParser testParser = new vcfParser(); - byte[] content = httpc.singleGET(contentUrl, 10000, null, null, null); + byte[] content = httpc.singleGET(contentUrl, contentUrl.getHost(), 10000, null, null, null); ByteArrayInputStream input = new ByteArrayInputStream(content); testParser.parse(contentUrl, "text/x-vcard", input); } catch (Exception e) { diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java index a756fdb6e..6b22d0205 100644 --- a/source/de/anomic/plasma/plasmaCrawlWorker.java +++ b/source/de/anomic/plasma/plasmaCrawlWorker.java @@ -345,8 +345,8 @@ public final class plasmaCrawlWorker extends Thread { // open the connection remote = ((theRemoteProxyConfig != null) && (theRemoteProxyConfig.useProxy())) - ? httpc.getInstance(host, port, socketTimeout, ssl, theRemoteProxyConfig,"CRAWLER",null) - : httpc.getInstance(host, port, socketTimeout, ssl, "CRAWLER",null); + ? httpc.getInstance(host, host, port, socketTimeout, ssl, theRemoteProxyConfig,"CRAWLER",null) + : httpc.getInstance(host, host, port, socketTimeout, ssl, "CRAWLER",null); // specifying if content encoding is allowed remote.setAllowContentEncoding(useContentEncodingGzip); diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index 154c8721b..678d8e168 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -704,7 +704,7 @@ public final class plasmaParser { contentURL = new URL(args[1]); // downloading the document content - byte[] contentBytes = httpc.singleGET(contentURL, 10000, null, null, null); + byte[] contentBytes = httpc.singleGET(contentURL, contentURL.getHost(), 10000, null, null, null); contentFile = File.createTempFile("content",".tmp"); contentFile.deleteOnExit(); diff --git a/source/de/anomic/urlRedirector/urlRedirectord.java b/source/de/anomic/urlRedirector/urlRedirectord.java index e9c4679b4..744e1e642 100644 --- a/source/de/anomic/urlRedirector/urlRedirectord.java +++ b/source/de/anomic/urlRedirector/urlRedirectord.java @@ -181,7 +181,7 @@ public class urlRedirectord implements serverHandler { URL reqURL = new URL(this.nextURL); // getting URL mimeType - httpHeader header = httpc.whead(reqURL, 10000, null, null, switchboard.remoteProxyConfig); + httpHeader header = httpc.whead(reqURL, reqURL.getHost(), 10000, null, null, switchboard.remoteProxyConfig); if (plasmaParser.supportedContent( plasmaParser.PARSER_MODE_URLREDIRECTOR, diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index 06e6f8af0..8d8d4b490 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -183,7 +183,7 @@ public class yacyPeerActions { reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache"); url = new URL(seedListFileURL); - header = httpc.whead(url, this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader); + header = httpc.whead(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader); if ((header == null) || (header.lastModified() == null)) { yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available"); } else if ((header.age() > 86400000) && (ssc > 0)) { diff --git a/source/yacy.java b/source/yacy.java index faafc91fa..63d08bc6b 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -573,7 +573,7 @@ public final class yacy { httpHeader requestHeader = new httpHeader(); requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify try { - httpc con = httpc.getInstance("localhost", port, 10000, false); + httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false); httpc.response res = con.GET("Steering.html?shutdown=", requestHeader); // read response @@ -1323,7 +1323,7 @@ public final class yacy { URL newUrl = new URL(newUrlStr); // doing a http head request to test if the url is correct - theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getPort(), 30000, false); + theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false); response res = theHttpc.HEAD(newUrl.getPath(), null); if (res.statusCode == 200) {