From feca150672434a94b232708798d2bfb18d4c9e89 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Mon, 25 Nov 2024 00:30:36 +0100 Subject: [PATCH] Automatically adjust crawling load limit to the local machine cpu cores The settings in the default configuration file is historic. Many machines have much more CPU cores today and now an auto-scaling to this hardware is better. --- source/net/yacy/htroot/Crawler_p.java | 3 ++- source/net/yacy/search/Switchboard.java | 7 +++++++ source/net/yacy/search/SwitchboardConstants.java | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/source/net/yacy/htroot/Crawler_p.java b/source/net/yacy/htroot/Crawler_p.java index 8c898f558..fa94dbc6f 100644 --- a/source/net/yacy/htroot/Crawler_p.java +++ b/source/net/yacy/htroot/Crawler_p.java @@ -803,7 +803,8 @@ public class Crawler_p { // we must increase the load limit because a conservative load limit will prevent a high crawling speed // however this must not cause that the load limit is reduced again because that may go against the users requirements // in case they set the limit themself, see https://github.com/yacy/yacy_search_server/issues/363 - float loadprereq = wantedPPM <= 10 ? 1.0f : wantedPPM <= 100 ? 2.0f : wantedPPM >= 1000 ? 8.0f : 3.0f; + float numberOfCores2 = 2.0f * (float) Runtime.getRuntime().availableProcessors(); + float loadprereq = wantedPPM <= 10 ? 1.0f : wantedPPM <= 100 ? 2.0f : wantedPPM >= 1000 ? numberOfCores2 : 3.0f; loadprereq = Math.max(loadprereq, sb.getConfigFloat(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL_LOADPREREQ, loadprereq)); BusyThread thread; diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 20778899c..30cc728dd 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -546,6 +546,13 @@ public final class Switchboard extends serverSwitch { solrWebgraphConfigurationWork.commit(); } catch (final IOException e) {ConcurrentLog.logException(e);} + // define load limitation according to current number of cpu cores + if (this.firstInit) { + float numberOfCores2 = 2.0f * (float) Runtime.getRuntime().availableProcessors(); + sb.setConfig(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL_LOADPREREQ, numberOfCores2); + sb.setConfig(SwitchboardConstants.SURROGATES_LOADPREREQ, numberOfCores2); + } + // define boosts Ranking.setMinTokenLen(this.getConfigInt(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, 3)); Ranking.setQuantRate(this.getConfigFloat(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, 0.5f)); diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index 39b2d2638..3cb82d58b 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -283,13 +283,13 @@ public final class SwitchboardConstants { public static final String REMOTESEARCH_MAXLOAD_RWI = "remotesearch.maxload.rwi"; /** Default maximum system load allowing remote RWI searches */ - public static final float REMOTESEARCH_MAXLOAD_RWI_DEFAULT = 8.0f; + public static final float REMOTESEARCH_MAXLOAD_RWI_DEFAULT = 2.0f * (float) Runtime.getRuntime().availableProcessors(); /** Setting key to configure the maximum system load allowing remote Solr searches */ public static final String REMOTESEARCH_MAXLOAD_SOLR = "remotesearch.maxload.solr"; /** Default maximum system load allowing remote Solr searches */ - public static final float REMOTESEARCH_MAXLOAD_SOLR_DEFAULT = 4.0f; + public static final float REMOTESEARCH_MAXLOAD_SOLR_DEFAULT = (float) Runtime.getRuntime().availableProcessors(); /** Key of the setting controlling whether https should be preferred for remote searches, when available on the target peer */ public static final String REMOTESEARCH_HTTPS_PREFERRED = "remotesearch.https.preferred";