diff --git a/build.properties b/build.properties index 1cdd0e78a..6c7c0960f 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.5 javacTarget=1.5 # Release Configuration -releaseVersion=0.582 +releaseVersion=0.583 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz diff --git a/source/de/anomic/crawler/ProtocolLoader.java b/source/de/anomic/crawler/ProtocolLoader.java index 66a5963ea..753287154 100644 --- a/source/de/anomic/crawler/ProtocolLoader.java +++ b/source/de/anomic/crawler/ProtocolLoader.java @@ -28,6 +28,9 @@ package de.anomic.crawler; import java.util.Arrays; import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; @@ -35,6 +38,9 @@ import de.anomic.server.logging.serverLog; public final class ProtocolLoader { + private static final long minDelay = 250; // milliseconds; 4 accesses per second + private static final ConcurrentHashMap accessTime = new ConcurrentHashMap(); // to protect targets from DDoS + private plasmaSwitchboard sb; private serverLog log; private HashSet supportedProtocols; @@ -64,7 +70,30 @@ public final class ProtocolLoader { public plasmaHTCache.Entry load(CrawlEntry entry, String parserMode) { // getting the protocol of the next URL String protocol = entry.url().getProtocol(); + String host = entry.url().getHost(); + + // check access time + if (!entry.url().isLocal()) { + Long lastAccess = accessTime.get(host); + long wait = 0; + if (lastAccess != null) wait = Math.max(0, minDelay + lastAccess.longValue() - System.currentTimeMillis()); + if (wait > 0) { + // force a sleep here. Instead just sleep we clean up the accessTime map + long untilTime = System.currentTimeMillis() + wait; + Iterator> i = accessTime.entrySet().iterator(); + Map.Entry e; + while (i.hasNext()) { + e = i.next(); + if (System.currentTimeMillis() > untilTime) break; + if (System.currentTimeMillis() - e.getValue().longValue() > minDelay) i.remove(); + } + if (System.currentTimeMillis() < untilTime) + try {Thread.sleep(untilTime - System.currentTimeMillis());} catch (InterruptedException ee) {} + } + } + accessTime.put(host, System.currentTimeMillis()); + // load resource if ((protocol.equals("http") || (protocol.equals("https")))) return httpLoader.load(entry, parserMode); if (protocol.equals("ftp")) return ftpLoader.load(entry); diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index c21e5f28b..bfb08c767 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -147,9 +147,8 @@ public final class plasmaSearchQuery { this.maxDistance = maxDistance; this.prefer = prefer; this.contentdom = contentdom; - this.linesPerPage = lines; - this.offset = offset; - //this.maximumTime = Math.min(6000, maximumTime); + this.linesPerPage = Math.min(100, lines); + this.offset = Math.min(100, offset); this.urlMask = urlMask; this.domType = domType; this.zonecode = domainzone; diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index a0b2ba6a9..980417dd6 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -56,6 +56,7 @@ public final class plasmaSearchRankingProcess { public static kelondroBinSearch[] ybrTables = null; // block-rank tables public static final int maxYBR = 3; // the lower this value, the faster the search private static boolean useYBR = true; + private static final int maxDoubleDom = 20; private kelondroSortStack stack; private HashMap> doubleDomCache; // key = domhash (6 bytes); value = like stack @@ -259,7 +260,7 @@ public final class plasmaSearchRankingProcess { m = this.doubleDomCache.get(domhash); if (m == null) { // first appearance of dom - m = new kelondroSortStack(-1); + m = new kelondroSortStack(maxDoubleDom); this.doubleDomCache.put(domhash, m); return rwi; } diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index f29e63f78..b1cbcf3d2 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -196,7 +196,7 @@ public final class serverCore extends serverAbstractBusyThread implements server InetAddress uAddr = s.getInetAddress(); if (uAddr.isAnyLocalAddress()) return "localhost"; String cIP = uAddr.getHostAddress(); - if (cIP.equals("0:0:0:0:0:0:0:1")) cIP = "localhost"; + if (cIP.startsWith("0:0:0:0:0:0:0:1")) cIP = "localhost"; if (cIP.equals("127.0.0.1")) cIP = "localhost"; return cIP; } diff --git a/source/de/anomic/yacy/yacyURL.java b/source/de/anomic/yacy/yacyURL.java index 69e97deb0..79dbd21b5 100644 --- a/source/de/anomic/yacy/yacyURL.java +++ b/source/de/anomic/yacy/yacyURL.java @@ -756,7 +756,12 @@ public class yacyURL implements Serializable { // checks for local/global IP range and local IP public boolean isLocal() { - if (this.hash == null) synchronized (this) {this.hash = urlHashComputation();} + if (this.hash == null) { + if (this.host.startsWith("0:0:0:0:0:0:0:1") || this.host.equals("127.0.0.1")) return true; + synchronized (this) { + this.hash = urlHashComputation(); + } + } return domDomain(this.hash) == 7; }