From fb766413d15ef5157ba030fd120b29c1502356f3 Mon Sep 17 00:00:00 2001 From: theli Date: Mon, 7 Nov 2005 10:57:54 +0000 Subject: [PATCH] *) Changes on httpc dns caching - Bugfix: old dns cache did not handle case insensitive hostnames correctly. - adding a possibility to set domain name patterns defining hostnames that should not be cached by the httpc dns cache e.g. borg-300.dyndns.org This can be done by setting the new httpc.nameCacheNoCachingPatterns property - using httpc.dnsResolve wherever possible within the sourcecode [httpd.java,plasmaCrawlStacker.java] git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1044 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/hello.java | 25 ++--- source/de/anomic/http/httpc.java | 96 ++++++++++++------- source/de/anomic/http/httpd.java | 37 +++---- .../de/anomic/plasma/plasmaCrawlStacker.java | 30 +++--- source/de/anomic/plasma/plasmaHTCache.java | 9 +- .../de/anomic/plasma/plasmaSwitchboard.java | 11 +++ .../de/anomic/server/logging/serverLog.java | 2 +- yacy.init | 3 + 8 files changed, 129 insertions(+), 84 deletions(-) diff --git a/htroot/yacy/hello.java b/htroot/yacy/hello.java index 6cb611a17..46e87db0c 100644 --- a/htroot/yacy/hello.java +++ b/htroot/yacy/hello.java @@ -50,6 +50,7 @@ import java.net.InetAddress; import java.util.Date; import de.anomic.http.httpHeader; +import de.anomic.http.httpc; import de.anomic.server.serverCore; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; @@ -108,17 +109,19 @@ public final class hello { boolean isLocalIP = false; if (serverCore.portForwardingEnabled) { try { - final InetAddress clientAddress = InetAddress.getByName(clientip); - if (clientAddress.isAnyLocalAddress() || clientAddress.isLoopbackAddress()) { - isLocalIP = true; - } else { - final InetAddress[] localAddress = InetAddress.getAllByName(InetAddress.getLocalHost().getHostName()); - for (i = 0; i < localAddress.length; i++) { - if (localAddress[i].equals(clientAddress)) { - isLocalIP = true; - break; - } - } + final InetAddress clientAddress = httpc.dnsResolve(clientip); + if (clientAddress != null) { + if (clientAddress.isAnyLocalAddress() || clientAddress.isLoopbackAddress()) { + isLocalIP = true; + } else { + final InetAddress[] localAddress = InetAddress.getAllByName(InetAddress.getLocalHost().getHostName()); + for (i = 0; i < localAddress.length; i++) { + if (localAddress[i].equals(clientAddress)) { + isLocalIP = true; + break; + } + } + } } } catch (Exception e) {} } diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index 5119052f7..0218e0032 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -46,6 +46,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PushbackInputStream; +import java.net.Inet4Address; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.MalformedURLException; @@ -59,7 +60,9 @@ import java.util.Date; import java.util.Enumeration; import java.util.GregorianCalendar; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.Locale; import java.util.TimeZone; import java.util.zip.GZIPInputStream; @@ -118,6 +121,8 @@ public final class httpc { // the dns cache private static final HashMap nameCacheHit = new HashMap(); + public static final LinkedList nameCacheNoCachingPatterns = new LinkedList(); + public static final HashSet nameCacheNoCachingList = new HashSet(); //private static HashSet nameCacheMiss = new HashSet(); /** @@ -386,45 +391,66 @@ public final class httpc { * @param host Hostname of the host in demand. * @return String with the ip. null, if the host could not be resolved. */ - public static String dnsResolve(String host) { - String ip = (String) nameCacheHit.get(host); + public static InetAddress dnsResolve(String host) { + if ((host == null)||(host.length() == 0)) return null; + host = host.toLowerCase().trim(); + + // trying to resolve host by doing a name cache lookup + InetAddress ip = (InetAddress) nameCacheHit.get(host); if (ip != null) return ip; + // if (nameCacheMiss.contains(host)) return null; - try { - ip = InetAddress.getByName(host).getHostAddress(); - if ((ip != null) && (!(ip.equals("127.0.0.1"))) && (!(ip.equals("localhost")))) { - if (host.indexOf("dyndns") < 0) nameCacheHit.put(host, ip); - return ip; + try { + boolean doCaching = true; + ip = InetAddress.getByName(host); + if ( + (ip == null) || + (ip.isLoopbackAddress()) || + (nameCacheNoCachingList.contains(ip.getHostName())) + ) { + doCaching = false; + } else { + Iterator noCachingPatternIter = nameCacheNoCachingPatterns.iterator(); + while (noCachingPatternIter.hasNext()) { + String nextPattern = (String) noCachingPatternIter.next(); + if (ip.getHostName().matches(nextPattern)) { + // disallow dns caching for this host + nameCacheNoCachingList.add(ip.getHostName()); + doCaching = false; + break; + } + } } - return null; + + if (doCaching) nameCacheHit.put(ip.getHostName(), ip); + return ip; } catch (UnknownHostException e) { //nameCacheMiss.add(host); } return null; } - /** - * Checks wether an hostname already is in the DNS-cache. - * FIXME: This method should use dnsResolve, as the code is 90% identical? - * - * @param host Searched for hostname. - * @return true, if the hostname already is in the cache. - */ - public static boolean dnsFetch(String host) { - if ((nameCacheHit.get(host) != null) /*|| (nameCacheMiss.contains(host)) */) return false; - if (host.indexOf("dyndns") < 0) return false; - try { - String ip = InetAddress.getByName(host).getHostAddress(); - if ((ip != null) && (!(ip.equals("127.0.0.1"))) && (!(ip.equals("localhost")))) { - nameCacheHit.put(host, ip); - return true; - } - return false; - } catch (UnknownHostException e) { - //nameCacheMiss.add(host); - return false; - } - } +// /** +// * Checks wether an hostname already is in the DNS-cache. +// * FIXME: This method should use dnsResolve, as the code is 90% identical? +// * +// * @param host Searched for hostname. +// * @return true, if the hostname already is in the cache. +// */ +// public static boolean dnsFetch(String host) { +// if ((nameCacheHit.get(host) != null) /*|| (nameCacheMiss.contains(host)) */) return false; +// try { +// String ip = InetAddress.getByName(host).getHostAddress(); +// if ((ip != null) && (!(ip.equals("127.0.0.1"))) && (!(ip.equals("localhost")))) { +// nameCacheHit.put(host, ip); +// return true; +// } +// return false; +// } catch (UnknownHostException e) { +// //nameCacheMiss.add(host); +// return false; +// } +// } /** * Returns the given date in an HTTP-usable format. @@ -527,13 +553,13 @@ public final class httpc { } this.host = server + ((port == 80) ? "" : (":" + port)); - String hostip; - if ((server.equals("localhost")) || (server.equals("127.0.0.1")) || (server.startsWith("192.168.")) || (server.startsWith("10."))) { - hostip = server; - } else { + InetAddress hostip; +// if ((server.equals("localhost")) || (server.equals("127.0.0.1")) || (server.startsWith("192.168.")) || (server.startsWith("10."))) { +// hostip = server; +// } else { hostip = dnsResolve(server); if (hostip == null) throw new UnknownHostException(server); - } +// } // creating a socket this.socket = (ssl) diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java index 1d161bb0e..3c3f76057 100644 --- a/source/de/anomic/http/httpd.java +++ b/source/de/anomic/http/httpd.java @@ -1062,22 +1062,19 @@ public final class httpd implements serverHandler { String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP,"127.0.0.1"); // check if ip is local ip address - try { - InetAddress hostAddress = InetAddress.getByName(clientIP); - if (hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress()) { - tp.put("host", serverCore.publicLocalIP().getHostAddress()); - tp.put("port", switchboard.getConfig("port", "8080")); - } else { - tp.put("host", serverCore.publicIP()); - tp.put("port", (serverCore.portForwardingEnabled && (serverCore.portForwarding != null)) - ? Integer.toString(serverCore.portForwarding.getPort()) - : switchboard.getConfig("port", "8080")); - } - } catch (UnknownHostException e) { + InetAddress hostAddress = httpc.dnsResolve(clientIP); + if (hostAddress == null) { + tp.put("host", serverCore.publicLocalIP().getHostAddress()); + tp.put("port", switchboard.getConfig("port", "8080")); + } else if (hostAddress.isSiteLocalAddress() || hostAddress.isLoopbackAddress()) { tp.put("host", serverCore.publicLocalIP().getHostAddress()); tp.put("port", switchboard.getConfig("port", "8080")); - } - + } else { + tp.put("host", serverCore.publicIP()); + tp.put("port", (serverCore.portForwardingEnabled && (serverCore.portForwarding != null)) + ? Integer.toString(serverCore.portForwarding.getPort()) + : switchboard.getConfig("port", "8080")); + } tp.put("peerName", yacyCore.seedDB.mySeed.getName()); tp.put("errorMessageType", errorcase); @@ -1318,9 +1315,12 @@ public final class httpd implements serverHandler { boolean isThisHostIP = false; try { - InetAddress hostAddress = InetAddress.getByName(hostName); - InetAddress forwardingAddress = InetAddress.getByName(serverCore.portForwarding.getHost()); + //InetAddress hostAddress = InetAddress.getByName(hostName); + InetAddress hostAddress = httpc.dnsResolve(hostName); + //InetAddress forwardingAddress = InetAddress.getByName(serverCore.portForwarding.getHost()); + InetAddress forwardingAddress = httpc.dnsResolve(serverCore.portForwarding.getHost()); + if ((hostAddress==null)||(forwardingAddress==null)) return false; if (hostAddress.equals(forwardingAddress)) return true; } catch (Exception e) {} return isThisHostIP; @@ -1331,7 +1331,10 @@ public final class httpd implements serverHandler { boolean isThisHostIP = false; try { - final InetAddress clientAddress = InetAddress.getByName(hostName); +// final InetAddress clientAddress = InetAddress.getByName(hostName); + final InetAddress clientAddress = httpc.dnsResolve(hostName); + if (clientAddress == null) return false; + if (clientAddress.isAnyLocalAddress() || clientAddress.isLoopbackAddress()) return true; final InetAddress[] localAddress = InetAddress.getAllByName(InetAddress.getLocalHost().getHostName()); diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index a7c23a857..338bc78b8 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -206,24 +206,22 @@ public final class plasmaCrawlStacker { } // check if ip is local ip address - try { - InetAddress hostAddress = InetAddress.getByName(nexturl.getHost()); - if (hostAddress.isSiteLocalAddress()) { - reason = "denied_(private_ip_address)"; - this.log.logFine("Host in URL '" + nexturlString + "' has private ip address." + - "Stack processing time: " + (System.currentTimeMillis()-startTime)); - return reason; - } else if (hostAddress.isLoopbackAddress()) { - reason = "denied_(loopback_ip_address)"; - this.log.logFine("Host in URL '" + nexturlString + "' has loopback ip address." + - "Stack processing time: " + (System.currentTimeMillis()-startTime)); - return reason; - } - } catch (UnknownHostException e) { + InetAddress hostAddress = httpc.dnsResolve(nexturl.getHost()); + if (hostAddress == null) { reason = "denied_(unknown_host)"; this.log.logFine("Unknown host in URL '" + nexturlString + "'." + - "Stack processing time: " + (System.currentTimeMillis()-startTime)); - return reason; + "Stack processing time: " + (System.currentTimeMillis()-startTime)); + return reason; + } else if (hostAddress.isSiteLocalAddress()) { + reason = "denied_(private_ip_address)"; + this.log.logFine("Host in URL '" + nexturlString + "' has private ip address." + + "Stack processing time: " + (System.currentTimeMillis()-startTime)); + return reason; + } else if (hostAddress.isLoopbackAddress()) { + reason = "denied_(loopback_ip_address)"; + this.log.logFine("Host in URL '" + nexturlString + "' has loopback ip address." + + "Stack processing time: " + (System.currentTimeMillis()-startTime)); + return reason; } // check blacklist diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index f8d1cf157..4563c9cf2 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -57,6 +57,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.HashSet; import java.io.IOException; +import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URL; import java.util.Date; @@ -336,14 +337,14 @@ public final class plasmaHTCache { // start to prefetch ip's from dns String dom; long start = System.currentTimeMillis(); - String ip, result = ""; + String result = ""; c = 0; while ((doms.size() > 0) && (c < 50) && ((System.currentTimeMillis() - start) < 60000)) { dom = (String) doms.getMaxObject(); - ip = httpc.dnsResolve(dom); + InetAddress ip = httpc.dnsResolve(dom); if (ip == null) continue; - result += ", " + dom + "=" + ip; - this.log.logConfig("PRE-FILLED " + dom + "=" + ip); + result += ", " + dom + "=" + ip.getHostAddress(); + this.log.logConfig("PRE-FILLED " + dom + "=" + ip.getHostAddress()); c++; doms.deleteScore(dom); // wait a short while to prevent that this looks like a DoS diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 656400a16..79bdcae36 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -449,9 +449,20 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser testresult = facilityDB.selectLong("statistik", (new serverDate()).toShortString(false).substring(0, 11)); */ + /* + * Initializing httpc + */ // initializing yacyDebugMode httpc.yacyDebugMode = getConfig("yacyDebugMode", "false").equals("true"); + // init nameCacheNoCachingList + String noCachingList = getConfig("httpc.nameCacheNoCachingPatterns",""); + String[] noCachingEntries = noCachingList.split(","); + for (int i=0; i