Fix for index entries which have id's not computed as hash from the url.

This makes it possible to operate with outside-computed url hashes in
enterprise environments not using the build-in crawler from YaCy.
pull/14/head
Michael Peter Christen 10 years ago
parent 2951c9fc40
commit 87f358058e

@ -357,6 +357,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
return domLengthEstimation(urlHashBytes) << 8 / 20;
}
@Deprecated
private static final int domDomain(final byte[] urlHash) {
// returns the ID of the domain of the domain
assert (urlHash != null);
@ -370,7 +371,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
@Override
public final boolean isLocal() {
if (this.isFile()) return true;
return domDomain(hash()) == 7;
return Domains.isLocal(this.host, this.hostAddress);
}
/**
@ -378,6 +379,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable {
* @param urlhash
* @return
*/
@Deprecated
public static final boolean isLocal(final byte[] urlhash) {
return domDomain(urlhash) == 7;
}

@ -1100,9 +1100,9 @@ public final class Protocol {
if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry.url()) ) {
if ( Network.log.isInfo() ) {
if (localsearch) {
Network.log.info("local search (solr): filtered blacklisted url " + urlEntry.url());
Network.log.info("local search (solr): filtered blacklisted url " + urlEntry.url().toNormalform(true));
} else {
Network.log.info("remote search (solr): filtered blacklisted url " + urlEntry.url() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())));
Network.log.info("remote search (solr): filtered blacklisted url " + urlEntry.url().toNormalform(true) + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())));
}
}
continue; // block with blacklist
@ -1112,9 +1112,9 @@ public final class Protocol {
if ( urlRejectReason != null ) {
if ( Network.log.isInfo() ) {
if (localsearch) {
Network.log.info("local search (solr): rejected url '" + urlEntry.url() + "' (" + urlRejectReason + ")");
Network.log.info("local search (solr): rejected url '" + urlEntry.url().toNormalform(true) + "' (" + urlRejectReason + ")");
} else {
Network.log.info("remote search (solr): rejected url '" + urlEntry.url() + "' (" + urlRejectReason + ") from peer " + target.getName());
Network.log.info("remote search (solr): rejected url '" + urlEntry.url().toNormalform(true) + "' (" + urlRejectReason + ") from peer " + target.getName());
}
}
continue; // reject url outside of our domain

Loading…
Cancel
Save