diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java index 7f2f825fb..7543596c1 100644 --- a/htroot/AccessTracker_p.java +++ b/htroot/AccessTracker_p.java @@ -146,7 +146,7 @@ public class AccessTracker_p { if (page == 2) { // local search prop.putNum("page_list_" + entCount + "_offset", searchProfile.offset); - prop.put("page_list_" + entCount + "_querystring", searchProfile.queryString); + prop.putHTML("page_list_" + entCount + "_querystring", searchProfile.queryString); } else { // remote search prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "" : searchProfile.remotepeer.getName()); diff --git a/htroot/Config_p.java b/htroot/Config_p.java index 9e56624e6..9f28bd036 100644 --- a/htroot/Config_p.java +++ b/htroot/Config_p.java @@ -83,7 +83,7 @@ public class Config_p { while(keys.hasNext()){ key = (String) keys.next(); prop.put("options_"+count+"_key", key); - prop.put("options_"+count+"_value", env.getConfig(key, "ERROR")); + prop.putHTML("options_"+count+"_value", env.getConfig(key, "ERROR")); count++; } diff --git a/htroot/Connections_p.java b/htroot/Connections_p.java index 34d7ce5f3..2a759a175 100644 --- a/htroot/Connections_p.java +++ b/htroot/Connections_p.java @@ -208,7 +208,7 @@ public final class Connections_p { prop.put("list_" + idx + "_ms", "1"); prop.putNum("list_" + idx + "_ms_duration", sessionTime); } - prop.put("list_" + idx + "_source",(seed!=null)?seed.getName()+".yacy":userAddress.getHostAddress()+":"+userPort); + prop.putHTML("list_" + idx + "_source",(seed!=null)?seed.getName()+".yacy":userAddress.getHostAddress()+":"+userPort); prop.put("list_" + idx + "_dest",(dest==null)?"-":dest); if (blockingRequest) { prop.put("list_" + idx + "_running", "0"); diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java index 915f06940..5cf109af9 100644 --- a/htroot/IndexCreateIndexingQueue_p.java +++ b/htroot/IndexCreateIndexingQueue_p.java @@ -138,11 +138,11 @@ public class IndexCreateIndexingQueue_p { totalSize += entrySize; initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); prop.put("indexing-queue_list_"+entryCount+"_dark", inProcess ? "2" : (dark ? "1" : "0")); - prop.put("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); + prop.putHTML("indexing-queue_list_"+entryCount+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put("indexing-queue_list_"+entryCount+"_depth", pcentry.depth()); prop.put("indexing-queue_list_"+entryCount+"_modified", pcentry.getModificationDate().toString()); prop.putHTML("indexing-queue_list_"+entryCount+"_anchor", (pcentry.anchorName()==null)?"":pcentry.anchorName()); - prop.put("indexing-queue_list_"+entryCount+"_url", pcentry.url().toNormalform(false, true)); + prop.putHTML("indexing-queue_list_"+entryCount+"_url", pcentry.url().toNormalform(false, true)); prop.put("indexing-queue_list_"+entryCount+"_size", serverMemory.bytesToString(entrySize)); prop.put("indexing-queue_list_"+entryCount+"_inProcess", inProcess ? "1" :"0"); prop.put("indexing-queue_list_"+entryCount+"_inProcess_hash", pcentry.urlHash()); @@ -185,9 +185,9 @@ public class IndexCreateIndexingQueue_p { executorHash = entry.executor(); initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); executorSeed = yacyCore.seedDB.getConnected(executorHash); - prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); - prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); - prop.put("rejected_list_"+j+"_url", url.toNormalform(false, true)); + prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); + prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); + prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false, true)); prop.putHTML("rejected_list_"+j+"_failreason", entry.anycause()); prop.put("rejected_list_"+j+"_dark", dark ? "1" : "0"); dark = !dark; diff --git a/htroot/IndexCreateLoaderQueue_p.java b/htroot/IndexCreateLoaderQueue_p.java index c72707229..e7dda9a0e 100644 --- a/htroot/IndexCreateLoaderQueue_p.java +++ b/htroot/IndexCreateLoaderQueue_p.java @@ -72,10 +72,10 @@ public class IndexCreateLoaderQueue_p { initiator = yacyCore.seedDB.getConnected(w[i].initiator()); prop.put("loader-set_list_"+count+"_dark", dark ? "1" : "0"); - prop.put("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); + prop.putHTML("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put("loader-set_list_"+count+"_depth", w[i].depth()); prop.put("loader-set_list_"+count+"_status", w[i].getStatus()); - prop.put("loader-set_list_"+count+"_url", w[i].url().toNormalform(true, false)); + prop.putHTML("loader-set_list_"+count+"_url", w[i].url().toNormalform(true, false)); dark = !dark; count++; } diff --git a/htroot/IndexCreateWWWGlobalQueue_p.java b/htroot/IndexCreateWWWGlobalQueue_p.java index dbb7bdba2..3291bd9ea 100644 --- a/htroot/IndexCreateWWWGlobalQueue_p.java +++ b/htroot/IndexCreateWWWGlobalQueue_p.java @@ -119,12 +119,12 @@ public class IndexCreateWWWGlobalQueue_p { profileHandle = urle.profileHandle(); profileEntry = (profileHandle == null) ? null : switchboard.profilesActiveCrawls.getEntry(profileHandle); prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0"); - prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name()); - prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); + prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); dark = !dark; showNum++; diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java index 1e25e2b7f..d4776d0f7 100644 --- a/htroot/IndexCreateWWWLocalQueue_p.java +++ b/htroot/IndexCreateWWWLocalQueue_p.java @@ -184,12 +184,12 @@ public class IndexCreateWWWLocalQueue_p { profileHandle = urle.profileHandle(); profileEntry = (profileHandle == null) ? null : sb.profilesActiveCrawls.getEntry(profileHandle); prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0"); - prop.put("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth()); prop.put("crawler-queue_list_"+showNum+"_modified", daydate(urle.loaddate()) ); prop.putHTML("crawler-queue_list_"+showNum+"_anchor", urle.name()); - prop.put("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); + prop.putHTML("crawler-queue_list_"+showNum+"_url", urle.url().toNormalform(false, true)); prop.put("crawler-queue_list_"+showNum+"_hash", urle.url().hash()); dark = !dark; showNum++; diff --git a/htroot/IndexCreateWWWRemoteQueue_p.java b/htroot/IndexCreateWWWRemoteQueue_p.java index e7468c4de..84198c029 100644 --- a/htroot/IndexCreateWWWRemoteQueue_p.java +++ b/htroot/IndexCreateWWWRemoteQueue_p.java @@ -119,12 +119,12 @@ public class IndexCreateWWWRemoteQueue_p { profileHandle = urle.profileHandle(); profileEntry = (profileHandle == null) ? null : sb.profilesActiveCrawls.getEntry(profileHandle); prop.put("crawler-queue_list_" + showNum + "_dark", dark ? "1" : "0"); - prop.put("crawler-queue_list_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName())); + prop.putHTML("crawler-queue_list_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName())); prop.put("crawler-queue_list_" + showNum + "_profile", ((profileEntry == null) ? "unknown" : profileEntry.name())); prop.put("crawler-queue_list_" + showNum + "_depth", urle.depth()); prop.put("crawler-queue_list_" + showNum + "_modified", daydate(urle.loaddate()) ); prop.putHTML("crawler-queue_list_" + showNum + "_anchor", urle.name()); - prop.put("crawler-queue_list_" + showNum + "_url", urle.url().toString()); + prop.putHTML("crawler-queue_list_" + showNum + "_url", urle.url().toString()); prop.put("crawler-queue_list_" + showNum + "_hash", urle.url().hash()); dark = !dark; showNum++; diff --git a/htroot/rct_p.java b/htroot/rct_p.java index e1f73bf70..492002650 100644 --- a/htroot/rct_p.java +++ b/htroot/rct_p.java @@ -73,7 +73,8 @@ public class rct_p { loaddate = new Date(); } yacyURL referrer = null; // referrer needed! - if (sb.acceptURL(url)) { + String urlRejectReason = sb.acceptURL(url); + if (urlRejectReason == null) { // stack url sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'"); String reasonString = sb.crawlStacker.stackCrawl(url, referrer, peerhash, "REMOTE-CRAWLING", loaddate, 0, sb.defaultRemoteProfile); @@ -88,7 +89,7 @@ public class rct_p { env.getLog().logInfo("crawlOrder: ignored [" + reasonString + "] remote crawl url: " + url.toNormalform(true, false)); } } else { - env.getLog().logWarning("crawlOrder: Received URL outside of our domain: " + url.toNormalform(true, false)); + env.getLog().logWarning("crawlOrder: Rejected URL '" + url.toNormalform(true, false) + "': " + urlRejectReason); } } } diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index 68fa2a15e..22eb1043c 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -147,8 +147,9 @@ public final class crawlReceipt { } // check if the entry is in our network domain - if (!switchboard.acceptURL(comp.url())) { - log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url outside of our domain) for hash " + entry.hash() + " from peer " + iam + "\n\tURL properties: "+ propStr); + String urlRejectReason = switchboard.acceptURL(comp.url()); + if (urlRejectReason != null) { + log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + entry.hash() + " from peer " + iam + "\n\tURL properties: "+ propStr); prop.put("delay", "9999"); return prop; } diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index c22d456b9..8518aaa02 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -145,8 +145,9 @@ public final class transferURL { } // check if the entry is in our network domain - if (!sb.acceptURL(comp.url())) { - yacyCore.log.logFine("transferURL: blocked URL outside of our domain '" + comp.url().toNormalform(false, true) + "' from peer " + otherPeerName); + String urlRejectReason = sb.acceptURL(comp.url()); + if (urlRejectReason != null) { + yacyCore.log.logFine("transferURL: blocked URL '" + comp.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName); lEntry = null; blocked++; continue; diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java index a26e6a01a..4fb1f8554 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java @@ -321,7 +321,8 @@ public class plasmaCrawlQueues { } catch (ParseException e) { loaddate = new Date(); } - if (sb.acceptURL(url)) { + String urlRejectReason = sb.acceptURL(url); + if (urlRejectReason == null) { // stack url sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'"); String reasonString = sb.crawlStacker.stackCrawl(url, referrer, hash, item.getDescription(), loaddate, 0, sb.defaultRemoteProfile); @@ -336,7 +337,7 @@ public class plasmaCrawlQueues { log.logInfo("crawlOrder: ignored [" + reasonString + "] remote crawl url: " + url.toNormalform(true, false)); } } else { - log.logWarning("crawlOrder: Received URL outside of our domain: " + url.toNormalform(true, false)); + log.logWarning("crawlOrder: Rejected URL '" + url.toNormalform(true, false) + "': " + urlRejectReason); } } return true; diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index e9763e65a..f9c7f4740 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -36,7 +36,6 @@ public class plasmaCrawlEURL { public static final String DENIED_URL_NULL = "denied_(url_null)"; public static final String DENIED_MALFORMED_URL = "denied_(malformed_url)"; public static final String DENIED_UNSUPPORTED_PROTOCOL = "denied_(unsupported_protocol)"; - public static final String DENIED_IP_ADDRESS_NOT_IN_DECLARED_DOMAIN = "denied_(address_not_in_declared_domain)"; public static final String DENIED_LOOPBACK_IP_ADDRESS = "denied_(loopback_ip_address)"; public static final String DENIED_CACHEFILE_PATH_TOO_LONG = "denied_(cachefile_path_too_long)"; public static final String DENIED_INVALID_CACHEFILE_PATH = "denied_(invalid_cachefile_path)"; diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 3e7764843..a5ef75fef 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -385,10 +385,10 @@ public final class plasmaCrawlStacker extends Thread { } // check if ip is local ip address - if (!sb.acceptURL(entry.url())) { - reason = plasmaCrawlEURL.DENIED_IP_ADDRESS_NOT_IN_DECLARED_DOMAIN + "[" + sb.getConfig("network.unit.domain", "unknown") + "]"; - if (this.log.isFine()) this.log.logFine("Host in URL '" + entry.url().toString() + "' has IP address outside of declared range (" + sb.getConfig("network.unit.domain", "unknown") + "). " + - "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); + String urlRejectReason = sb.acceptURL(entry.url()); + if (urlRejectReason != null) { + reason = "denied_(" + urlRejectReason + ")_domain=" + sb.getConfig("network.unit.domain", "unknown"); + if (this.log.isFine()) this.log.logFine(reason + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); return reason; } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 97513eaad..019a55655 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1427,21 +1427,32 @@ public final class plasmaSwitchboard extends serverAbstractSwitch