From 87f358058e527e05101837b8345937117df3911e Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 22 Sep 2015 11:56:17 +0200 Subject: [PATCH] Fix for index entries which have id's not computed as hash from the url. This makes it possible to operate with outside-computed url hashes in enterprise environments not using the build-in crawler from YaCy. --- source/net/yacy/cora/document/id/DigestURL.java | 4 +++- source/net/yacy/peers/Protocol.java | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/source/net/yacy/cora/document/id/DigestURL.java b/source/net/yacy/cora/document/id/DigestURL.java index 4ebdd8950..40328b1c4 100644 --- a/source/net/yacy/cora/document/id/DigestURL.java +++ b/source/net/yacy/cora/document/id/DigestURL.java @@ -357,6 +357,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable { return domLengthEstimation(urlHashBytes) << 8 / 20; } + @Deprecated private static final int domDomain(final byte[] urlHash) { // returns the ID of the domain of the domain assert (urlHash != null); @@ -370,7 +371,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable { @Override public final boolean isLocal() { if (this.isFile()) return true; - return domDomain(hash()) == 7; + return Domains.isLocal(this.host, this.hostAddress); } /** @@ -378,6 +379,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable { * @param urlhash * @return */ + @Deprecated public static final boolean isLocal(final byte[] urlhash) { return domDomain(urlhash) == 7; } diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 59198b53f..c5c173189 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1100,9 +1100,9 @@ public final class Protocol { if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry.url()) ) { if ( Network.log.isInfo() ) { if (localsearch) { - Network.log.info("local search (solr): filtered blacklisted url " + urlEntry.url()); + Network.log.info("local search (solr): filtered blacklisted url " + urlEntry.url().toNormalform(true)); } else { - Network.log.info("remote search (solr): filtered blacklisted url " + urlEntry.url() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); + Network.log.info("remote search (solr): filtered blacklisted url " + urlEntry.url().toNormalform(true) + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName()))); } } continue; // block with blacklist @@ -1112,9 +1112,9 @@ public final class Protocol { if ( urlRejectReason != null ) { if ( Network.log.isInfo() ) { if (localsearch) { - Network.log.info("local search (solr): rejected url '" + urlEntry.url() + "' (" + urlRejectReason + ")"); + Network.log.info("local search (solr): rejected url '" + urlEntry.url().toNormalform(true) + "' (" + urlRejectReason + ")"); } else { - Network.log.info("remote search (solr): rejected url '" + urlEntry.url() + "' (" + urlRejectReason + ") from peer " + target.getName()); + Network.log.info("remote search (solr): rejected url '" + urlEntry.url().toNormalform(true) + "' (" + urlRejectReason + ") from peer " + target.getName()); } } continue; // reject url outside of our domain