diff --git a/htroot/IndexCreateQueues_p.java b/htroot/IndexCreateQueues_p.java index 3adf3fa04..393889daf 100644 --- a/htroot/IndexCreateQueues_p.java +++ b/htroot/IndexCreateQueues_p.java @@ -14,6 +14,7 @@ import java.util.regex.PatternSyntaxException; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.CrawlSwitchboard; @@ -141,8 +142,7 @@ public class IndexCreateQueues_p { int hc = 0; for (Map.Entry host: hosts.entrySet()) { String hostnameport = host.getKey(); - int p = hostnameport.lastIndexOf(':'); - String hostname = p < 0 ? hostnameport : hostnameport.substring(0, p); + String hostname = Domains.stripToHostName(hostnameport); prop.putHTML("crawler_host_" + hc + "_hostnameport", hostnameport); prop.putHTML("crawler_host_" + hc + "_hostname", hostname); prop.put("crawler_host_" + hc + "_embed", embed ? 1 : 0); diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java index 6e1adb458..3b8eab40c 100644 --- a/htroot/Settings_p.java +++ b/htroot/Settings_p.java @@ -108,7 +108,7 @@ public final class Settings_p { } else { prop.put("use_proxyAccounts", "1"); //checked /*s = env.getConfig("proxyAccount", "proxy:void"); - pos = s.indexOf(":"); + pos = s.indexOf(':'); if (pos < 0) { prop.put("proxyuser","proxy"); } else { diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 9caa078c2..d1077d0ec 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -155,7 +155,7 @@ public class MultiProtocolURL implements Serializable, Comparable 0) target = target.substring(p + 3); + p = target.indexOf('/'); + if (p > 0) target = target.substring(0, p); + + // IPv4 / host heuristics + p = target.lastIndexOf(':'); + if ( p < 0 ) { + // may be IPv4 or IPv6, we chop off brackets if exist + if (target.charAt(0) == '[') target = target.substring(1); + if (target.charAt(target.length() - 1) == ']') target = target.substring(0, target.length() - 1); + return target; + } + + // the ':' at pos p may be either a port divider or a part of an IPv6 address + if (target.charAt(p - 1) == ']') { + target = target.substring(1, p - 1); + return target; + } + + // the ':' must be a port divider + target = target.substring(0, p); + return target; + } + + public static int stripToPort(String target) { + int port = 80; // default port + + // normalize + if (target == null || target.isEmpty()) return port; + target = target.toLowerCase().trim(); // we can lowercase this because host names are case-insensitive + + // extract the address (host:port) part (applies if this is an url) + int p = target.indexOf("://"); + if (p > 0) { + String protocol = target.substring(0, p); + target = target.substring(p + 3); + if ("https".equals(protocol)) port = 443; + if ("ftp".equals(protocol)) port = 21; + if ("smb".equals(protocol)) port = 445; + } + p = target.indexOf('/'); + if (p > 0) target = target.substring(0, p); + + // IPv4 / host heuristics + p = target.lastIndexOf(':'); + if ( p < 0 ) return port; + + // the ':' must be a port divider + port = Integer.parseInt(target.substring(p + 1)); + return port; + } + /** * resolve a host address using a local DNS cache and a DNS lookup if necessary * @param clienthost * @return the hosts InetAddress or null if the address cannot be resolved */ public static InetAddress dnsResolve(final String host0) { + // consider to call stripToHostName() before calling this if (host0 == null || host0.isEmpty()) return null; final String host = host0.toLowerCase().trim(); diff --git a/source/net/yacy/cora/protocol/HeaderFramework.java b/source/net/yacy/cora/protocol/HeaderFramework.java index 969ccaff2..e7886b444 100644 --- a/source/net/yacy/cora/protocol/HeaderFramework.java +++ b/source/net/yacy/cora/protocol/HeaderFramework.java @@ -574,7 +574,7 @@ public class HeaderFramework extends TreeMap implements Map= 0) { - newHost = hostPort.substring(0, posPort); - newPort = Integer.parseInt(hostPort.substring(posPort + 1)); - } else { - newHost = hostPort; - newPort = 80; - } + int newPort = Domains.stripToPort(hostPort); + String newHost = Domains.stripToHostName(hostPort); if (alternativeResolvers.myIPs().contains(newHost)) return; if (Domains.isLocal(newHost, null)) return; RequestDispatcher dispatcher = request.getRequestDispatcher(path + target); diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 88aad5073..b465e5c95 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -232,12 +232,8 @@ public final class Protocol { } else { try { // patch the remote peer address to avoid that remote peers spoof the network with wrong addresses - final int p = targetAddress.lastIndexOf(':'); - if ( p < 0 ) return null; - String h = targetAddress.substring(0, p); - if (h.charAt(0) == '[') h = h.substring(1); - if (h.charAt(h.length() - 1) == ']') h = h.substring(0, h.length() - 1); - InetAddress ie = Domains.dnsResolve(h); + String host = Domains.stripToHostName(targetAddress); + InetAddress ie = Domains.dnsResolve(host); otherPeer = Seed.genRemoteSeed(seed, false, ie.getHostAddress()); if ( !otherPeer.hash.equals(targetHash) ) { Network.log.info("yacyClient.hello: consistency error: otherPeer.hash = " + otherPeer.hash + ", otherHash = " + targetHash); @@ -340,13 +336,10 @@ public final class Protocol { } else { try { if ( i == 1 ) { - final int p = targetAddress.lastIndexOf(':'); - if ( p < 0 ) { - return null; - } - InetAddress ia = Domains.dnsResolve(targetAddress.substring(0, p)); + String host = Domains.stripToHostName(targetAddress); + InetAddress ia = Domains.dnsResolve(host); if (ia == null) continue; - final String host = ia.getHostAddress(); // the actual address of the target as we had been successful when contacting them is patched here + host = ia.getHostAddress(); // the actual address of the target as we had been successful when contacting them is patched here s = Seed.genRemoteSeed(seedStr, false, host); } else { s = Seed.genRemoteSeed(seedStr, false, null); diff --git a/source/net/yacy/server/http/HTTPDProxyHandler.java b/source/net/yacy/server/http/HTTPDProxyHandler.java index 65e5760e6..ff8dcdfa8 100644 --- a/source/net/yacy/server/http/HTTPDProxyHandler.java +++ b/source/net/yacy/server/http/HTTPDProxyHandler.java @@ -423,13 +423,8 @@ public final class HTTPDProxyHandler { final String ip = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); // the ip from the connecting peer - int port, pos; - if ((pos = host.indexOf(':')) < 0) { - port = 80; - } else { - port = Integer.parseInt(host.substring(pos + 1)); - host = host.substring(0, pos); - } + int port = Domains.stripToPort(host); + host = Domains.stripToHostName(host); // resolve yacy and yacyh domains String yAddress = resolveYacyDomains(host); @@ -438,10 +433,10 @@ public final class HTTPDProxyHandler { final String remotePath = (args == null) ? path : (path + "?" + args); // with leading '/' // remove yacy-subdomain-path, when accessing /env - if ( (yAddress != null) + if ((yAddress != null) && (remotePath.startsWith("/env")) - && ((pos = yAddress.indexOf('/')) != -1) - ) yAddress = yAddress.substring(0, yAddress.indexOf('/')); + && (yAddress.indexOf('/') != -1) + ) yAddress = yAddress.substring(0, yAddress.indexOf('/')); modifyProxyHeaders(requestHeader, httpVer); @@ -1050,11 +1045,8 @@ public final class HTTPDProxyHandler { String orgHostName = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); if (orgHostName == null) orgHostName = "unknown"; orgHostName = orgHostName.toLowerCase(); - int pos = orgHostName.indexOf(':'); - if (pos != -1) { - orgHostPort = orgHostName.substring(pos+1); - orgHostName = orgHostName.substring(0,pos); - } + orgHostPort = Integer.toString(Domains.stripToPort(orgHostName)); + orgHostName = Domains.stripToHostName(orgHostName); String orgHostPath = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); if (orgHostPath == null) orgHostPath = ""; String orgHostArgs = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); if (orgHostArgs == null) orgHostArgs = ""; if (orgHostArgs.length() > 0) orgHostArgs = "?" + orgHostArgs; @@ -1078,7 +1070,7 @@ public final class HTTPDProxyHandler { if (addr != null) if (addr != null) testHostNames.add(testHostName); } - pos = orgHostName.lastIndexOf('.'); + int pos = orgHostName.lastIndexOf('.'); if (pos != -1) { final Iterator iter = topLevelDomains.iterator(); while (iter.hasNext()) { diff --git a/source/net/yacy/server/http/HTTPDemon.java b/source/net/yacy/server/http/HTTPDemon.java index d2b606aa8..51d51b9cb 100644 --- a/source/net/yacy/server/http/HTTPDemon.java +++ b/source/net/yacy/server/http/HTTPDemon.java @@ -144,14 +144,8 @@ public final class HTTPDemon { final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); final String method = (String) conProp.get(HeaderFramework.CONNECTION_PROP_METHOD); - final int port; - final int pos = host.indexOf(':'); - if (pos != -1) { - port = NumberTools.parseIntDecSubstring(host, pos + 1); - host = host.substring(0, pos); - } else { - port = 80; - } + final int port = Domains.stripToPort(host); + host = Domains.stripToHostName(host); String urlString; try {