From a3cd3852ab4be20f89d861bf703945e619102705 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 7 Dec 2012 15:49:23 +0100 Subject: [PATCH] introduced a better place to update the lastacc time value in latency --- source/net/yacy/crawler/data/Latency.java | 38 +++++++++++++------ .../net/yacy/crawler/retrieval/FTPLoader.java | 2 + .../yacy/crawler/retrieval/HTTPLoader.java | 3 +- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/source/net/yacy/crawler/data/Latency.java b/source/net/yacy/crawler/data/Latency.java index daecaec3f..9fae6ce8f 100644 --- a/source/net/yacy/crawler/data/Latency.java +++ b/source/net/yacy/crawler/data/Latency.java @@ -46,38 +46,57 @@ public class Latency { private static final ConcurrentHashMap map = new ConcurrentHashMap(); /** - * update the latency entry after a host was accessed to load a file + * update the latency entry after a host was selected for queueing into the loader + * @param url + * @param robotsCrawlDelay the crawl-delay given by the robots; 0 if not exist + */ + public static void updateAfterSelection(final DigestURI url, final long robotsCrawlDelay) { + final String host = url.getHost(); + if (host == null) return; + String hosthash = url.hosthash(); + Host h = map.get(hosthash); + if (h == null) { + h = new Host(host, DEFAULT_AVERAGE, robotsCrawlDelay); + if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear(); + map.put(hosthash, h); + } + } + + /** + * update the latency entry before a host is accessed * @param url * @param time the time to load the file in milliseconds */ - public static void updateAfterLoad(final DigestURI url, final long time) { + public static void updateBeforeLoad(final DigestURI url) { final String host = url.getHost(); if (host == null) return; String hosthash = url.hosthash(); Host h = map.get(hosthash); if (h == null) { - h = new Host(host, time); + h = new Host(host, 500, 0); if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear(); map.put(hosthash, h); } else { - h.update(time); + h.update(); } } /** - * update the latency entry after a host was selected for queueing into the loader + * update the latency entry after a host was accessed to load a file * @param url - * @param robotsCrawlDelay the crawl-delay given by the robots; 0 if not exist + * @param time the time to load the file in milliseconds */ - public static void updateAfterSelection(final DigestURI url, final long robotsCrawlDelay) { + public static void updateAfterLoad(final DigestURI url, final long time) { final String host = url.getHost(); if (host == null) return; String hosthash = url.hosthash(); Host h = map.get(hosthash); if (h == null) { - h = new Host(host, DEFAULT_AVERAGE, robotsCrawlDelay); + h = new Host(host, time, 0); if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear(); map.put(hosthash, h); + } else { + h.update(time); } } @@ -252,9 +271,6 @@ public class Latency { private AtomicInteger count; private final String host; private long robotsMinDelay; - private Host(final String host, final long time) { - this(host, time, 0); - } private Host(final String host, final long time, long robotsMinDelay) { this.host = host; this.timeacc = new AtomicLong(time); diff --git a/source/net/yacy/crawler/retrieval/FTPLoader.java b/source/net/yacy/crawler/retrieval/FTPLoader.java index 93fb0ee2c..414a247a8 100644 --- a/source/net/yacy/crawler/retrieval/FTPLoader.java +++ b/source/net/yacy/crawler/retrieval/FTPLoader.java @@ -69,6 +69,8 @@ public class FTPLoader { */ public Response load(final Request request, final boolean acceptOnlyParseable) throws IOException { + Latency.updateBeforeLoad(request.url()); + final long start = System.currentTimeMillis(); final DigestURI entryUrl = request.url(); final String fullPath = getPath(entryUrl); diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java index 1550c56d0..3e515c0a7 100644 --- a/source/net/yacy/crawler/retrieval/HTTPLoader.java +++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java @@ -72,9 +72,10 @@ public final class HTTPLoader { } public Response load(final Request entry, final int maxFileSize, final BlacklistType blacklistType) throws IOException { + Latency.updateBeforeLoad(entry.url()); final long start = System.currentTimeMillis(); final Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType); - if (!doc.fromCache()) Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start); + Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start); return doc; }