|
|
|
@ -46,38 +46,57 @@ public class Latency {
|
|
|
|
|
private static final ConcurrentHashMap<String, Host> map = new ConcurrentHashMap<String, Host>();
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* update the latency entry after a host was accessed to load a file
|
|
|
|
|
* update the latency entry after a host was selected for queueing into the loader
|
|
|
|
|
* @param url
|
|
|
|
|
* @param robotsCrawlDelay the crawl-delay given by the robots; 0 if not exist
|
|
|
|
|
*/
|
|
|
|
|
public static void updateAfterSelection(final DigestURI url, final long robotsCrawlDelay) {
|
|
|
|
|
final String host = url.getHost();
|
|
|
|
|
if (host == null) return;
|
|
|
|
|
String hosthash = url.hosthash();
|
|
|
|
|
Host h = map.get(hosthash);
|
|
|
|
|
if (h == null) {
|
|
|
|
|
h = new Host(host, DEFAULT_AVERAGE, robotsCrawlDelay);
|
|
|
|
|
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
|
|
|
|
|
map.put(hosthash, h);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* update the latency entry before a host is accessed
|
|
|
|
|
* @param url
|
|
|
|
|
* @param time the time to load the file in milliseconds
|
|
|
|
|
*/
|
|
|
|
|
public static void updateAfterLoad(final DigestURI url, final long time) {
|
|
|
|
|
public static void updateBeforeLoad(final DigestURI url) {
|
|
|
|
|
final String host = url.getHost();
|
|
|
|
|
if (host == null) return;
|
|
|
|
|
String hosthash = url.hosthash();
|
|
|
|
|
Host h = map.get(hosthash);
|
|
|
|
|
if (h == null) {
|
|
|
|
|
h = new Host(host, time);
|
|
|
|
|
h = new Host(host, 500, 0);
|
|
|
|
|
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
|
|
|
|
|
map.put(hosthash, h);
|
|
|
|
|
} else {
|
|
|
|
|
h.update(time);
|
|
|
|
|
h.update();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* update the latency entry after a host was selected for queueing into the loader
|
|
|
|
|
* update the latency entry after a host was accessed to load a file
|
|
|
|
|
* @param url
|
|
|
|
|
* @param robotsCrawlDelay the crawl-delay given by the robots; 0 if not exist
|
|
|
|
|
* @param time the time to load the file in milliseconds
|
|
|
|
|
*/
|
|
|
|
|
public static void updateAfterSelection(final DigestURI url, final long robotsCrawlDelay) {
|
|
|
|
|
public static void updateAfterLoad(final DigestURI url, final long time) {
|
|
|
|
|
final String host = url.getHost();
|
|
|
|
|
if (host == null) return;
|
|
|
|
|
String hosthash = url.hosthash();
|
|
|
|
|
Host h = map.get(hosthash);
|
|
|
|
|
if (h == null) {
|
|
|
|
|
h = new Host(host, DEFAULT_AVERAGE, robotsCrawlDelay);
|
|
|
|
|
h = new Host(host, time, 0);
|
|
|
|
|
if (map.size() > 1000 || MemoryControl.shortStatus()) map.clear();
|
|
|
|
|
map.put(hosthash, h);
|
|
|
|
|
} else {
|
|
|
|
|
h.update(time);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -252,9 +271,6 @@ public class Latency {
|
|
|
|
|
private AtomicInteger count;
|
|
|
|
|
private final String host;
|
|
|
|
|
private long robotsMinDelay;
|
|
|
|
|
private Host(final String host, final long time) {
|
|
|
|
|
this(host, time, 0);
|
|
|
|
|
}
|
|
|
|
|
private Host(final String host, final long time, long robotsMinDelay) {
|
|
|
|
|
this.host = host;
|
|
|
|
|
this.timeacc = new AtomicLong(time);
|
|
|
|
|