diff --git a/htroot/Collage.java b/htroot/Collage.java index c3c902cb1..3a41ae927 100755 --- a/htroot/Collage.java +++ b/htroot/Collage.java @@ -1,4 +1,4 @@ -// Collage.java +// Collage.java // ----------------------- // part of YaCy // (C) by Detlef Reichl; detlef!reichl()gmx!org @@ -28,18 +28,17 @@ import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; - import de.anomic.crawler.ResultImages; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class Collage { private static int fifoMax = 20; - + private static int fifoPos = -1; private static int fifoSize = 0; private static long zIndex = 0; - + private static ResultImages.OriginEntry origins[] = new ResultImages.OriginEntry[fifoMax]; private static Integer imgWidth[] = new Integer[fifoMax]; private static Integer imgHeight[] = new Integer[fifoMax]; @@ -47,7 +46,7 @@ public class Collage { private static Integer imgPosY[] = new Integer[fifoMax]; private static long imgZIndex[] = new long[fifoMax]; private static final Random rand = new Random(); - + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { final serverObjects prop = new serverObjects(); final Switchboard sb = (Switchboard) env; @@ -56,7 +55,7 @@ public class Collage { int posXMax = 800; int posYMax = 500; boolean embed = false; - + if (post != null) { embed = post.containsKey("emb"); posXMax = post.getInt("width", posXMax); @@ -64,40 +63,40 @@ public class Collage { if (post.containsKey("max")) fifoMax = post.getInt("max", fifoMax); } prop.put("emb", (embed) ? "0" : "1"); - + if (nextOrigin != null) { System.out.println("NEXTORIGIN=" + nextOrigin.imageEntry.url().toNormalform(true, false)); if (fifoSize == 0 || origins[fifoPos] != nextOrigin) { fifoPos = fifoPos + 1 >= fifoMax ? 0 : fifoPos + 1; fifoSize = fifoSize + 1 > fifoMax ? fifoMax : fifoSize + 1; origins[fifoPos] = nextOrigin; - + final float scale = rand.nextFloat() * 1.5f + 1; imgWidth[fifoPos] = (int) ((nextOrigin.imageEntry.width()) / scale); imgHeight[fifoPos] = (int) ((nextOrigin.imageEntry.height()) / scale); imgPosX[fifoPos] = rand.nextInt((imgWidth[fifoPos] == 0) ? posXMax / 2 : Math.max(1, posXMax - imgWidth[fifoPos])); imgPosY[fifoPos] = rand.nextInt((imgHeight[fifoPos] == 0) ? posYMax / 2 : Math.max(1, posYMax - imgHeight[fifoPos])); - + imgZIndex[fifoPos] = zIndex; zIndex += 1; } } - + if (fifoSize > 0) { - prop.put("imgurl", "1"); + prop.put("imgurl", "1"); int c = 0; final int yOffset = embed ? 0 : 70; for (int i = 0; i < fifoSize; i++) { - + final MultiProtocolURI baseURL = origins[i].baseURL; final MultiProtocolURI imageURL = origins[i].imageEntry.url(); - + // check if this loads a page from localhost, which must be prevented to protect the server // against attacks to the administration interface when localhost access is granted - if ((Domains.isLocal(baseURL.getHost()) || Domains.isLocal(imageURL.getHost())) && + if ((Domains.isLocal(baseURL.getHost(), null) || Domains.isLocal(imageURL.getHost(), null)) && sb.getConfigBool("adminAccountForLocalhost", false)) continue; - + final long z = imgZIndex[i]; prop.put("imgurl_list_" + c + "_url", "" @@ -118,7 +117,7 @@ public class Collage { } else { prop.put("imgurl", "0"); } - + prop.putNum("refresh", Math.max(2, Math.min(5, 500 / (1 + ResultImages.queueSize(!authenticated))))); prop.put("emb_privateQueueSize", ResultImages.privateQueueHighSize() + "+" + ResultImages.privateQueueLowSize()); prop.put("emb_publicQueueSize", ResultImages.publicQueueHighSize() + "+" + ResultImages.publicQueueLowSize()); diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index e8b6ecc44..b89d940ec 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -167,7 +167,7 @@ public final class search { block = true; } } - if (block && Domains.isLocal(client)) block = false; + if (block && Domains.isLocal(client, null)) block = false; if (block) { prop.put("links", ""); prop.put("linkcount", "0"); diff --git a/source/de/anomic/crawler/Latency.java b/source/de/anomic/crawler/Latency.java index c384084f8..3c6dca7da 100644 --- a/source/de/anomic/crawler/Latency.java +++ b/source/de/anomic/crawler/Latency.java @@ -39,9 +39,9 @@ public class Latency { // the map is a mapping from host names to host configurations private static final ConcurrentHashMap map = new ConcurrentHashMap(); - - public static void update(MultiProtocolURI url, long time) { - String host = url.getHost(); + + public static void update(final MultiProtocolURI url, final long time) { + final String host = url.getHost(); if (host == null) return; Host h = map.get(host); if (h == null) { @@ -52,9 +52,9 @@ public class Latency { h.update(time); } } - - public static void update(MultiProtocolURI url) { - String host = url.getHost(); + + public static void update(final MultiProtocolURI url) { + final String host = url.getHost(); if (host == null) return; Host h = map.get(host); if (h == null) { @@ -65,9 +65,9 @@ public class Latency { h.update(); } } - - public static void slowdown(MultiProtocolURI url) { - String host = url.getHost(); + + public static void slowdown(final MultiProtocolURI url) { + final String host = url.getHost(); if (host == null) return; Host h = map.get(host); if (h == null) { @@ -78,39 +78,39 @@ public class Latency { h.slowdown(); } } - - public static Host host(MultiProtocolURI url) { - String host = url.getHost(); + + public static Host host(final MultiProtocolURI url) { + final String host = url.getHost(); if (host == null) return null; return map.get(host); } - - public static int average(MultiProtocolURI url) { - String host = url.getHost(); + + public static int average(final MultiProtocolURI url) { + final String host = url.getHost(); if (host == null) return 0; - Host h = map.get(host); + final Host h = map.get(host); if (h == null) return 0; return h.average(); } - + public static Iterator> iterator() { return map.entrySet().iterator(); } - + /** * calculate the time since the last access of the domain as referenced by the url hash * @param urlhash * @return a time in milliseconds since last access of the domain or Long.MAX_VALUE if the domain was not accessed before */ - public static long lastAccessDelta(MultiProtocolURI url) { + public static long lastAccessDelta(final MultiProtocolURI url) { final Latency.Host host = Latency.host(url); if (host == null) return Long.MAX_VALUE; // never accessed return System.currentTimeMillis() - host.lastacc(); } - - + + /** * guess a minimum waiting time * the time is not correct, because if the domain was not checked yet by the robots.txt delay value, it is too low @@ -121,34 +121,34 @@ public class Latency { * @return the remaining waiting time in milliseconds. The return value may be negative * which expresses how long the time is over the minimum waiting time. */ - public static long waitingRemainingGuessed(String hostname, final long minimumLocalDelta, final long minimumGlobalDelta) { + public static long waitingRemainingGuessed(final String hostname, final long minimumLocalDelta, final long minimumGlobalDelta) { if (hostname == null) return 0; - Host host = map.get(hostname); + final Host host = map.get(hostname); if (host == null) return 0; - + // the time since last access to the domain is the basis of the remaining calculation final long timeSinceLastAccess = System.currentTimeMillis() - host.lastacc(); - + // find the minimum waiting time based on the network domain (local or global) - final boolean local = Domains.isLocal(hostname); + final boolean local = Domains.isLocal(hostname, null); long waiting = (local) ? minimumLocalDelta : minimumGlobalDelta; - + // if we have accessed the domain many times, get slower (the flux factor) if (!local) waiting += host.flux(waiting); - + // use the access latency as rule how fast we can access the server // this applies also to localhost, but differently, because it is not necessary to // consider so many external accesses waiting = Math.max(waiting, (local) ? host.average() / 2 : host.average() * 2); - + // prevent that that a robots file can stop our indexer completely waiting = Math.min(60000, waiting); - + // return time that is remaining //System.out.println("Latency: " + (waiting - timeSinceLastAccess)); return waiting - timeSinceLastAccess; } - + /** * calculates how long should be waited until the domain can be accessed again * this follows from: @@ -161,20 +161,20 @@ public class Latency { * @param minimumGlobalDelta * @return the remaining waiting time in milliseconds */ - public static long waitingRemaining(MultiProtocolURI url, final Set thisAgents, final long minimumLocalDelta, final long minimumGlobalDelta) { + public static long waitingRemaining(final MultiProtocolURI url, final Set thisAgents, final long minimumLocalDelta, final long minimumGlobalDelta) { // first check if the domain was _ever_ accessed before - Host host = host(url); + final Host host = host(url); if (host == null) return Long.MIN_VALUE; // no delay if host is new - + // find the minimum waiting time based on the network domain (local or global) final boolean local = url.isLocal(); if (local) return minimumLocalDelta; long waiting = (local) ? minimumLocalDelta : minimumGlobalDelta; - + // the time since last access to the domain is the basis of the remaining calculation final long timeSinceLastAccess = System.currentTimeMillis() - host.lastacc(); - + // for CGI accesses, we double the minimum time // mostly there is a database access in the background // which creates a lot of unwanted IO on target site @@ -182,52 +182,52 @@ public class Latency { // if we have accessed the domain many times, get slower (the flux factor) if (!local && host != null) waiting += host.flux(waiting); - + // find the delay as given by robots.txt on target site long robotsDelay = 0; if (!local) { RobotsTxtEntry robotsEntry; try { robotsEntry = Switchboard.getSwitchboard().robots.getEntry(url, thisAgents); - } catch (IOException e) { + } catch (final IOException e) { robotsEntry = null; } robotsDelay = (robotsEntry == null) ? 0 : robotsEntry.getCrawlDelayMillis(); if (robotsEntry != null && robotsDelay == 0 && robotsEntry.getAgentName() != null) return 0; // no limits if granted exclusively for this peer } waiting = Math.max(waiting, robotsDelay); - + // use the access latency as rule how fast we can access the server // this applies also to localhost, but differently, because it is not necessary to // consider so many external accesses waiting = Math.max(waiting, (local) ? host.average() / 2 : host.average() * 2); - + // prevent that that a robots file can stop our indexer completely waiting = Math.min(60000, waiting); - + // return time that is remaining //System.out.println("Latency: " + (waiting - timeSinceLastAccess)); return Math.max(0, waiting - timeSinceLastAccess); } - - - public static String waitingRemainingExplain(MultiProtocolURI url, final Set thisAgents, final long minimumLocalDelta, final long minimumGlobalDelta) { - + + + public static String waitingRemainingExplain(final MultiProtocolURI url, final Set thisAgents, final long minimumLocalDelta, final long minimumGlobalDelta) { + // first check if the domain was _ever_ accessed before - Host host = host(url); + final Host host = host(url); if (host == null) return "host " + host + " never accessed before -> 0"; // no delay if host is new - - StringBuilder s = new StringBuilder(50); - + + final StringBuilder s = new StringBuilder(50); + // find the minimum waiting time based on the network domain (local or global) final boolean local = url.isLocal(); - long waiting = (local) ? minimumLocalDelta : minimumGlobalDelta; + final long waiting = (local) ? minimumLocalDelta : minimumGlobalDelta; s.append("minimumDelta = ").append(waiting); - + // the time since last access to the domain is the basis of the remaining calculation final long timeSinceLastAccess = (host == null) ? 0 : System.currentTimeMillis() - host.lastacc(); s.append(", timeSinceLastAccess = ").append(timeSinceLastAccess); - + // for CGI accesses, we double the minimum time // mostly there is a database access in the background // which creates a lot of unwanted IO on target site @@ -235,43 +235,43 @@ public class Latency { // if we have accessed the domain many times, get slower (the flux factor) if (!local && host != null) s.append(", flux = ").append(host.flux(waiting)); - + // find the delay as given by robots.txt on target site long robotsDelay = 0; if (!local) { RobotsTxtEntry robotsEntry; try { robotsEntry = Switchboard.getSwitchboard().robots.getEntry(url, thisAgents); - } catch (IOException e) { + } catch (final IOException e) { robotsEntry = null; } robotsDelay = (robotsEntry == null) ? 0 : robotsEntry.getCrawlDelayMillis(); if (robotsEntry != null && robotsDelay == 0 && robotsEntry.getAgentName() != null) return "no waiting for exclusive granted peer"; // no limits if granted exclusively for this peer } s.append(", robots.delay = ").append(robotsDelay); - + // use the access latency as rule how fast we can access the server // this applies also to localhost, but differently, because it is not necessary to // consider so many external accesses if (host != null) s.append(", host.average = ").append(host.average()); - + return s.toString(); } - + public static final class Host { private long timeacc; private long lastacc; private int count; private final String host; private long robotsMinDelay; - public Host(String host, long time) { + public Host(final String host, final long time) { this.host = host; this.timeacc = time; this.count = 1; this.lastacc = System.currentTimeMillis(); this.robotsMinDelay = 0; } - public void update(long time) { + public void update(final long time) { this.lastacc = System.currentTimeMillis(); this.timeacc += Math.min(30000, time); this.count++; @@ -296,15 +296,15 @@ public class Latency { public String host() { return this.host; } - public void robotsDelay(long ur) { + public void robotsDelay(final long ur) { this.robotsMinDelay = ur; } public long robotsDelay() { return this.robotsMinDelay; } - public long flux(long range) { - return count >= 1000 ? range * Math.min(5000, count) / 1000 : range / (1000 - count); + public long flux(final long range) { + return this.count >= 1000 ? range * Math.min(5000, this.count) / 1000 : range / (1000 - this.count); } } - + } diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 406be20f4..aca1ea803 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -171,7 +171,7 @@ public final class serverCore extends AbstractBusyThread implements BusyThread { final InetAddress uAddr = s.getInetAddress(); if (uAddr.isAnyLocalAddress()) return "127.0.0.1"; String cIP = uAddr.getHostAddress(); - if (Domains.isLocal(cIP)) cIP = "127.0.0.1"; + if (Domains.isLocal(cIP, null)) cIP = "127.0.0.1"; return cIP; } diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java index 1847de5f4..b46387fc9 100644 --- a/source/net/yacy/cora/document/MultiProtocolURI.java +++ b/source/net/yacy/cora/document/MultiProtocolURI.java @@ -88,7 +88,7 @@ public class MultiProtocolURI implements Serializable, Comparable 0) ? host.substring(p + 1) : ""; final Integer i = TLDID.get(tld); @@ -875,11 +875,19 @@ public class Domains { ); } - public static boolean isLocal(final String host) { - return isLocal(host, true); + public static boolean isLocal(final String host, final InetAddress hostaddress) { + return isLocal(host, hostaddress, true); } - private static boolean isLocal(final String host, final boolean recursive) { + /** + * check if the given host is a local address. + * the hostaddress is optional and shall be given if the address is already known + * @param host + * @param hostaddress may be null if not known yet + * @param recursive + * @return true if the given host is local + */ + private static boolean isLocal(final String host, InetAddress hostaddress, final boolean recursive) { if (noLocalCheck || // DO NOT REMOVE THIS! it is correct to return true if the check is off host == null || @@ -900,8 +908,8 @@ public class Domains { // check dns lookup: may be a local address even if the domain name looks global if (!recursive) return false; - final InetAddress a = dnsResolve(host); - return isLocal(a); + if (hostaddress == null) hostaddress = dnsResolve(host); + return isLocal(hostaddress); } public static boolean isLocal(final InetAddress a) { @@ -912,7 +920,7 @@ public class Domains { a.isLinkLocalAddress() | a.isLoopbackAddress() || a.isSiteLocalAddress() || - isLocal(a.getHostAddress(), false); + isLocal(a.getHostAddress(), a, false); return localp; } diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index 1c5ec9dfe..a4713d551 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -175,7 +175,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable { assert this.hash == null; // should only be called if the hash was not computed before - final int id = Domains.getDomainID(this.host); // id=7: tld is local + final int id = Domains.getDomainID(this.host, this.hostAddress); // id=7: tld is local final boolean isHTTP = isHTTP(); int p = (this.host == null) ? -1 : this.host.lastIndexOf('.'); String dom = (p > 0) ? dom = this.host.substring(0, p) : ""; @@ -278,7 +278,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable { */ public static final String hosthash6(final String protocol, final String host, final int port) { final StringBuilder hash = new StringBuilder(12); - final int id = Domains.getDomainID(host); // id=7: tld is local + final int id = Domains.getDomainID(host, null); // id=7: tld is local int p = host.lastIndexOf('.'); String dom = (p > 0) ? dom = host.substring(0, p) : ""; p = dom.lastIndexOf('.'); diff --git a/source/net/yacy/peers/yacySeed.java b/source/net/yacy/peers/yacySeed.java index a7a25cf96..d5210b574 100644 --- a/source/net/yacy/peers/yacySeed.java +++ b/source/net/yacy/peers/yacySeed.java @@ -521,7 +521,7 @@ public class yacySeed implements Cloneable, Comparable, Comparator, Comparator, Comparator 0) return this.birthdate; long b; try { - GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes + final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes b = my_SHORT_SECOND_FORMATTER.parse(get(yacySeed.BDATE, "20040101000000")).getTime(); } catch (final ParseException e) { b = System.currentTimeMillis(); @@ -864,7 +864,7 @@ public class yacySeed implements Cloneable, Comparable, Comparator IP is null"; if (ipString.length() > 0 && ipString.length() < 8) return ipString + " -> IP is too short: "; if (Switchboard.getSwitchboard().isAllIPMode()) return null; - final boolean islocal = Domains.isLocal(ipString); + final boolean islocal = Domains.isLocal(ipString, null); //if (islocal && Switchboard.getSwitchboard().isGlobalMode()) return ipString + " - local IP for global mode rejected"; if (!islocal && Switchboard.getSwitchboard().isIntranetMode()) return ipString + " - global IP for intranet mode rejected"; return null;