From c6c61be3f0814e3e777ef485575e39b29ce5e081 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 24 Feb 2012 00:38:57 +0100 Subject: [PATCH] fix for http://bugs.yacy.net/view.php?id=148 --- htroot/CrawlResults.java | 2 +- source/de/anomic/crawler/CrawlStacker.java | 6 ++++-- source/net/yacy/cora/document/MultiProtocolURI.java | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 519f8cd0a..e2edb10c0 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -253,7 +253,7 @@ public class CrawlResults { if (showIP && urle != null) { prop.put("table_indexed_" + cnt + "_showIP", "1"); - prop.put("table_indexed_" + cnt + "_showIP_ip", urle.url().getInetAddress().getHostAddress()); + prop.put("table_indexed_" + cnt + "_showIP_ip", urle.url().getHost() == null ? "" : urle.url().getInetAddress().getHostAddress()); } else prop.put("table_indexed_" + cnt + "_showIP", "0"); diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 9a0c6c557..57aa22143 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -186,6 +186,7 @@ public final class CrawlStacker { } public void enqueueEntriesAsynchronous(final byte[] initiator, final String profileHandle, final Map hyperlinks, final boolean replace) { new Thread() { + @Override public void run() { enqueueEntries(initiator, profileHandle, hyperlinks, true); } @@ -240,6 +241,7 @@ public final class CrawlStacker { public void enqueueEntriesFTP(final byte[] initiator, final String profileHandle, final String host, final int port, final boolean replace) { final CrawlQueues cq = this.nextQueue; new Thread() { + @Override public void run() { BlockingQueue queue; try { @@ -487,13 +489,13 @@ public final class CrawlStacker { // this is expensive and those filters are check at the end of all other tests // filter with must-match for IPs - if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_ALL_PATTERN && !profile.ipMustMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) { + if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_ALL_PATTERN && url.getHost() != null && !profile.ipMustMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) { if (this.log.isFine()) this.log.logFine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' does not match must-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'."); return "ip " + url.getInetAddress().getHostAddress() + " of url does not match must-match filter"; } // filter with must-not-match for IPs - if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && profile.ipMustNotMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) { + if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && url.getHost() != null && profile.ipMustNotMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) { if (this.log.isFine()) this.log.logFine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'."); return "ip " + url.getInetAddress().getHostAddress() + " of url matches must-not-match filter"; } diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java index f73a1d254..d8ce9b393 100644 --- a/source/net/yacy/cora/document/MultiProtocolURI.java +++ b/source/net/yacy/cora/document/MultiProtocolURI.java @@ -702,6 +702,7 @@ public class MultiProtocolURI implements Serializable, Comparable