introduced a better place to update the lastacc time value in latency

pull/1/head
Michael Peter Christen 12 years ago
parent 864abcd33d
commit a3cd3852ab

@ -46,38 +46,57 @@ public class Latency {
private static final ConcurrentHashMap<String, Host> map = new ConcurrentHashMap<String, Host>();
/**
* update the latency entry after a host was accessed to load a file
* update the latency entry after a host was selected for queueing into the loader
* @param url
* @param robotsCrawlDelay the crawl-delay given by the robots; 0 if not exist
*/
public static void updateAfterSelection(final DigestURI url, final long robotsCrawlDelay) {
final String host = url.getHost();
if (host == null) return;
String hosthash = url.hosthash();
Host h = map.get(hosthash);
if (h == null) {
h = new Host(host, DEFAULT_AVERAGE, robotsCrawlDelay);
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
map.put(hosthash, h);
}
}
/**
* update the latency entry before a host is accessed
* @param url
* @param time the time to load the file in milliseconds
*/
public static void updateAfterLoad(final DigestURI url, final long time) {
public static void updateBeforeLoad(final DigestURI url) {
final String host = url.getHost();
if (host == null) return;
String hosthash = url.hosthash();
Host h = map.get(hosthash);
if (h == null) {
h = new Host(host, time);
h = new Host(host, 500, 0);
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
map.put(hosthash, h);
} else {
h.update(time);
h.update();
}
}
/**
* update the latency entry after a host was selected for queueing into the loader
* update the latency entry after a host was accessed to load a file
* @param url
* @param robotsCrawlDelay the crawl-delay given by the robots; 0 if not exist
* @param time the time to load the file in milliseconds
*/
public static void updateAfterSelection(final DigestURI url, final long robotsCrawlDelay) {
public static void updateAfterLoad(final DigestURI url, final long time) {
final String host = url.getHost();
if (host == null) return;
String hosthash = url.hosthash();
Host h = map.get(hosthash);
if (h == null) {
h = new Host(host, DEFAULT_AVERAGE, robotsCrawlDelay);
h = new Host(host, time, 0);
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
map.put(hosthash, h);
} else {
h.update(time);
}
}
@ -252,9 +271,6 @@ public class Latency {
private AtomicInteger count;
private final String host;
private long robotsMinDelay;
private Host(final String host, final long time) {
this(host, time, 0);
}
private Host(final String host, final long time, long robotsMinDelay) {
this.host = host;
this.timeacc = new AtomicLong(time);

@ -69,6 +69,8 @@ public class FTPLoader {
*/
public Response load(final Request request, final boolean acceptOnlyParseable) throws IOException {
Latency.updateBeforeLoad(request.url());
final long start = System.currentTimeMillis();
final DigestURI entryUrl = request.url();
final String fullPath = getPath(entryUrl);

@ -72,9 +72,10 @@ public final class HTTPLoader {
}
public Response load(final Request entry, final int maxFileSize, final BlacklistType blacklistType) throws IOException {
Latency.updateBeforeLoad(entry.url());
final long start = System.currentTimeMillis();
final Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType);
if (!doc.fromCache()) Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
return doc;
}

Loading…
Cancel
Save