new crawling factors can now be changed during runtime

pull/1/head
Michael Peter Christen 11 years ago
parent be5e808236
commit 0168f80c28

@ -41,17 +41,9 @@ import net.yacy.search.Switchboard;
public class Latency {
private final static int DEFAULT_AVERAGE_LATENCY = 500;
private final static int DEFAULT_MAX_SAME_HOST_IN_QUEUE = 20;
private final static float DEFAULT_LATENCY_FACTOR = 0.5f;
// the map is a mapping from host names to host configurations
private static final int mapMaxSize = 1000;
private static final ConcurrentHashMap<String, Host> map = new ConcurrentHashMap<String, Host>();
public static int defaultAverageLatency = DEFAULT_AVERAGE_LATENCY;
public static int MaxSameHostInQueue = DEFAULT_MAX_SAME_HOST_IN_QUEUE;
public static float latencyFactor = DEFAULT_LATENCY_FACTOR;
/**
* update the latency entry after a host was selected for queueing into the loader
@ -64,7 +56,7 @@ public class Latency {
String hosthash = url.hosthash();
Host h = map.get(hosthash);
if (h == null) {
h = new Host(host, defaultAverageLatency, robotsCrawlDelay);
h = new Host(host, Switchboard.getSwitchboard().getConfigInt("crawler.defaultAverageLatency", 500), robotsCrawlDelay);
if (map.size() > mapMaxSize || MemoryControl.shortStatus()) map.clear();
map.put(hosthash, h);
}
@ -168,10 +160,10 @@ public class Latency {
// use the access latency as rule how fast we can access the server
// this applies also to localhost, but differently, because it is not necessary to
// consider so many external accesses
waiting = Math.max(waiting, (int) (host.average() * latencyFactor));
waiting = Math.max(waiting, (int) (host.average() * Switchboard.getSwitchboard().getConfigFloat("crawler.latencyFactor", 0.5f)));
// if the number of same hosts as in the url in the loading queue is greater than MaxSameHostInQueue, then increase waiting
if (Switchboard.getSwitchboard().crawlQueues.hostcount(hostname) > MaxSameHostInQueue) waiting += 5000;
if (Switchboard.getSwitchboard().crawlQueues.hostcount(hostname) > Switchboard.getSwitchboard().getConfigInt("crawler.MaxSameHostInQueue", 20)) waiting += 5000;
// the time since last access to the domain is the basis of the remaining calculation
final int timeSinceLastAccess = (int) (System.currentTimeMillis() - host.lastacc());
@ -211,10 +203,10 @@ public class Latency {
if (!local) waiting += host.flux(waiting);
// use the access latency as rule how fast we can access the server
waiting = Math.max(waiting, (int) (host.average() * latencyFactor));
waiting = Math.max(waiting, (int) (host.average() * Switchboard.getSwitchboard().getConfigFloat("crawler.latencyFactor", 0.5f)));
// if the number of same hosts as in the url in the loading queue is greater than MaxSameHostInQueue, then increase waiting
if (Switchboard.getSwitchboard().crawlQueues.hostcount(url.getHost()) > MaxSameHostInQueue) waiting += 5000;
if (Switchboard.getSwitchboard().crawlQueues.hostcount(url.getHost()) > Switchboard.getSwitchboard().getConfigInt("crawler.MaxSameHostInQueue", 20)) waiting += 5000;
// the time since last access to the domain is the basis of the remaining calculation
final int timeSinceLastAccess = (int) (System.currentTimeMillis() - host.lastacc());
@ -252,11 +244,11 @@ public class Latency {
// this applies also to localhost, but differently, because it is not necessary to
// consider so many external accesses
s.append(", host.average = ").append(host.average());
waiting = Math.max(waiting, (int) (host.average() * latencyFactor));
waiting = Math.max(waiting, (int) (host.average() * Switchboard.getSwitchboard().getConfigFloat("crawler.latencyFactor", 0.5f)));
// if the number of same hosts as in the url in the loading queue is greater than MaxSameHostInQueue, then increase waiting
int hostcount = Switchboard.getSwitchboard().crawlQueues.hostcount(url.getHost());
if (hostcount > MaxSameHostInQueue) {
if (hostcount > Switchboard.getSwitchboard().getConfigInt("crawler.MaxSameHostInQueue", 20)) {
s.append(", hostcount = ").append(hostcount);
waiting += 5000;
}

@ -825,9 +825,6 @@ public final class Switchboard extends serverSwitch {
getDataPath());
OAIListFriendsLoader.init(this.loader, oaiFriends, ClientIdentification.yacyInternetCrawlerAgent);
this.crawlQueues = new CrawlQueues(this, this.queuesRoot);
Latency.defaultAverageLatency = this.getConfigInt("crawler.defaultAverageLatency", 500);
Latency.latencyFactor = this.getConfigFloat("crawler.latencyFactor", 0.5f);
Latency.MaxSameHostInQueue = this.getConfigInt("crawler.MaxSameHostInQueue", 20);
// on startup, resume all crawls
setConfig(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL + "_isPaused", "false");

Loading…
Cancel
Save