- fix for localhost detection

- added IPv6 patterns for localhost detection
pull/1/head
Michael Peter Christen 12 years ago
parent 34f8786508
commit 7c3de8b4cd

@ -73,7 +73,8 @@ public class Domains {
private static Method InetAddressLocatorGetLocaleInetAddressMethod; private static Method InetAddressLocatorGetLocaleInetAddressMethod;
private static final Set<String> ccSLD_TLD = new HashSet<String>(); private static final Set<String> ccSLD_TLD = new HashSet<String>();
private static final String PRESENT = ""; private static final String PRESENT = "";
private static final String LOCAL_PATTERNS = "10\\..*,127\\..*,172\\.(1[6-9]|2[0-9]|3[0-1])\\..*,169\\.254\\..*,192\\.168\\..*,localhost"; private static final Pattern LOCAL_PATTERNS = Pattern.compile("(10\\..*)|(127\\..*)|(172\\.(1[6-9]|2[0-9]|3[0-1])\\..*)|(169\\.254\\..*)|(192\\.168\\..*)|(localhost)|(\\[?\\:\\:1/.*)|(\\[?fc.*)|(\\[?fd.*)|(\\[?(fe80|0)\\:0\\:0\\:0\\:0\\:0\\:0\\:1.*)");
private static final int MAX_NAME_CACHE_HIT_SIZE = 100000; private static final int MAX_NAME_CACHE_HIT_SIZE = 100000;
private static final int MAX_NAME_CACHE_MISS_SIZE = 100000; private static final int MAX_NAME_CACHE_MISS_SIZE = 100000;
private static final int CONCURRENCY_LEVEL = Runtime.getRuntime().availableProcessors() + 1; private static final int CONCURRENCY_LEVEL = Runtime.getRuntime().availableProcessors() + 1;
@ -83,7 +84,6 @@ public class Domains {
private static final ARC<String, String> NAME_CACHE_MISS = new ConcurrentARC<String, String>(MAX_NAME_CACHE_MISS_SIZE, CONCURRENCY_LEVEL); private static final ARC<String, String> NAME_CACHE_MISS = new ConcurrentARC<String, String>(MAX_NAME_CACHE_MISS_SIZE, CONCURRENCY_LEVEL);
private static final ConcurrentHashMap<String, Object> LOOKUP_SYNC = new ConcurrentHashMap<String, Object>(100, 0.75f, Runtime.getRuntime().availableProcessors() * 2); private static final ConcurrentHashMap<String, Object> LOOKUP_SYNC = new ConcurrentHashMap<String, Object>(100, 0.75f, Runtime.getRuntime().availableProcessors() * 2);
private static List<Pattern> nameCacheNoCachingPatterns = Collections.synchronizedList(new LinkedList<Pattern>()); private static List<Pattern> nameCacheNoCachingPatterns = Collections.synchronizedList(new LinkedList<Pattern>());
private static final List<Pattern> INTRANET_PATTERNS = makePatterns(LOCAL_PATTERNS);
public static long cacheHit_Hit = 0, cacheHit_Miss = 0, cacheHit_Insert = 0; // for statistics only; do not write public static long cacheHit_Hit = 0, cacheHit_Miss = 0, cacheHit_Insert = 0; // for statistics only; do not write
public static long cacheMiss_Hit = 0, cacheMiss_Miss = 0, cacheMiss_Insert = 0; // for statistics only; do not write public static long cacheMiss_Hit = 0, cacheMiss_Miss = 0, cacheMiss_Insert = 0; // for statistics only; do not write
@ -832,9 +832,9 @@ public class Domains {
if (ip == null || ip.length() < 8) return null; if (ip == null || ip.length() < 8) return null;
ip = ip.trim(); ip = ip.trim();
if (ip.charAt(0) == '[' && ip.charAt(ip.length() - 1) == ']') ip = ip.substring(1, ip.length() - 1); if (ip.charAt(0) == '[' && ip.charAt(ip.length() - 1) == ']') ip = ip.substring(1, ip.length() - 1);
if (isLocalhost(ip)) ip = "127.0.0.1"; // normalize to IPv4 here since that is the way to calculate the InetAddress if ("localhost".equals(ip)) ip = "127.0.0.1"; // normalize to IPv4 here since that is the way to calculate the InetAddress
final String[] ips = CommonPattern.DOT.split(ip); final String[] ips = CommonPattern.DOT.split(ip);
if (ips.length != 4) return null; if (ips.length != 4) return null; // TODO: parse IPv6 addresses
final byte[] ipb = new byte[4]; final byte[] ipb = new byte[4];
try { try {
ipb[0] = (byte) Integer.parseInt(ips[0]); ipb[0] = (byte) Integer.parseInt(ips[0]);
@ -989,8 +989,7 @@ public class Domains {
final Set<InetAddress> list = new HashSet<InetAddress>(); final Set<InetAddress> list = new HashSet<InetAddress>();
if (localHostAddresses.isEmpty()) return list; // give up if (localHostAddresses.isEmpty()) return list; // give up
for (final InetAddress a: localHostAddresses) { for (final InetAddress a: localHostAddresses) {
if (((0Xff & a.getAddress()[0]) == 127) || if ((0Xff & a.getAddress()[0]) == 127 || LOCAL_PATTERNS.matcher(a.getHostAddress()).matches()) continue;
(!matchesList(a.getHostAddress(), INTRANET_PATTERNS))) continue;
list.add(a); list.add(a);
} }
return list; return list;
@ -1051,13 +1050,7 @@ public class Domains {
*/ */
public static boolean isLocalhost(final String host) { public static boolean isLocalhost(final String host) {
return (noLocalCheck || // DO NOT REMOVE THIS! it is correct to return true if the check is off return (noLocalCheck || // DO NOT REMOVE THIS! it is correct to return true if the check is off
"127.0.0.1".equals(host) || (host != null && LOCAL_PATTERNS.matcher(host).matches()));
"localhost".equals(host) ||
host.startsWith("0:0:0:0:0:0:0:1") || host.startsWith("[0:0:0:0:0:0:0:1]") ||
host.startsWith("fe80:0:0:0:0:0:0:1") || host.startsWith("[fe80:0:0:0:0:0:0:1]") || // used by my mac as localhost
host.startsWith("::1/") || host.startsWith("[::1/") ||
"::1".equals(host) || "[::1]".equals(host)
);
} }
/** /**
@ -1077,10 +1070,12 @@ public class Domains {
host == null || host == null ||
host.isEmpty()) return true; host.isEmpty()) return true;
// FIXME IPv4 only
// check local ip addresses // check local ip addresses
if (matchesList(host, INTRANET_PATTERNS)) return true;
if (isLocalhost(host)) return true; if (isLocalhost(host)) return true;
if (hostaddress != null && (
isLocalhost(hostaddress.getHostAddress()) ||
isLocal(hostaddress)
)) return true;
// check if there are other local IP addresses that are not in // check if there are other local IP addresses that are not in
// the standard IP range // the standard IP range
@ -1107,10 +1102,9 @@ public class Domains {
localp = noLocalCheck || // DO NOT REMOVE THIS! it is correct to return true if the check is off localp = noLocalCheck || // DO NOT REMOVE THIS! it is correct to return true if the check is off
a == null || a == null ||
a.isAnyLocalAddress() || a.isAnyLocalAddress() ||
a.isLinkLocalAddress() | a.isLinkLocalAddress() ||
a.isLoopbackAddress() || a.isLoopbackAddress() ||
a.isSiteLocalAddress() || a.isSiteLocalAddress();
isLocal(a.getHostAddress(), a, false);
return localp; return localp;
} }
@ -1148,7 +1142,7 @@ public class Domains {
public static Locale getLocale(final InetAddress address) { public static Locale getLocale(final InetAddress address) {
if (InetAddressLocatorGetLocaleInetAddressMethod == null) return null; if (InetAddressLocatorGetLocaleInetAddressMethod == null) return null;
if (address == null) return null; if (address == null) return null;
if (isLocal(address)) return null; if (isLocal(address.getHostAddress(), address, false)) return null;
try { try {
return (Locale) InetAddressLocatorGetLocaleInetAddressMethod.invoke(null, new Object[]{address}); return (Locale) InetAddressLocatorGetLocaleInetAddressMethod.invoke(null, new Object[]{address});
} catch (final IllegalArgumentException e) { } catch (final IllegalArgumentException e) {

@ -66,10 +66,10 @@ public class CrawlQueues {
private static final String ERROR_DB_FILENAME = "urlError4.db"; private static final String ERROR_DB_FILENAME = "urlError4.db";
private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db"; private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
protected Switchboard sb; private Switchboard sb;
protected Log log; private Log log;
protected Map<Integer, Loader> workers; // mapping from url hash to Worker thread object private Map<Integer, Loader> workers; // mapping from url hash to Worker thread object
private final ArrayList<String> remoteCrawlProviderHashes; private final ArrayList<String> remoteCrawlProviderHashes;
public NoticedURL noticeURL; public NoticedURL noticeURL;
public ZURL errorURL, delegatedURL; public ZURL errorURL, delegatedURL;
@ -119,10 +119,8 @@ public class CrawlQueues {
public void clear() { public void clear() {
// wait for all workers to finish // wait for all workers to finish
for (final Loader w: this.workers.values()) { for (final Loader w: this.workers.values()) w.interrupt();
w.interrupt(); for (final Loader w: this.workers.values()) try {w.join(10);} catch (InterruptedException e1) {}
}
// TODO: wait some more time until all threads are finished
this.workers.clear(); this.workers.clear();
this.remoteCrawlProviderHashes.clear(); this.remoteCrawlProviderHashes.clear();
this.noticeURL.clear(); this.noticeURL.clear();
@ -192,7 +190,7 @@ public class CrawlQueues {
return null; return null;
} }
public void cleanup() { private void cleanup() {
// wait for all workers to finish // wait for all workers to finish
final int timeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000); final int timeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
for (final Loader w: this.workers.values()) { for (final Loader w: this.workers.values()) {
@ -613,13 +611,13 @@ public class CrawlQueues {
return this.workers.size(); return this.workers.size();
} }
protected final class Loader extends Thread { private final class Loader extends Thread {
protected Request request; private Request request;
private final Integer code; private final Integer code;
private final long start; private final long start;
public Loader(final Request entry) { private Loader(final Request entry) {
this.start = System.currentTimeMillis(); this.start = System.currentTimeMillis();
this.request = entry; this.request = entry;
this.request.setStatus("worker-initialized", WorkflowJob.STATUS_INITIATED); this.request.setStatus("worker-initialized", WorkflowJob.STATUS_INITIATED);
@ -627,7 +625,7 @@ public class CrawlQueues {
this.setPriority(Thread.MIN_PRIORITY); // http requests from the crawler should not cause that other functions work worse this.setPriority(Thread.MIN_PRIORITY); // http requests from the crawler should not cause that other functions work worse
} }
public long age() { private long age() {
return System.currentTimeMillis() - this.start; return System.currentTimeMillis() - this.start;
} }
@ -702,11 +700,9 @@ public class CrawlQueues {
FailCategory.TEMPORARY_NETWORK_FAILURE, FailCategory.TEMPORARY_NETWORK_FAILURE,
e.getMessage() + " - in worker", -1); e.getMessage() + " - in worker", -1);
Log.logException(e); Log.logException(e);
// Client.initConnectionManager();
this.request.setStatus("worker-exception", WorkflowJob.STATUS_FINISHED); this.request.setStatus("worker-exception", WorkflowJob.STATUS_FINISHED);
} finally { } finally {
final Loader w = CrawlQueues.this.workers.remove(this.code); CrawlQueues.this.workers.remove(this.code);
assert w != null;
} }
} }
} }

Loading…
Cancel
Save