diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html index 7d81a4893..1df797804 100644 --- a/htroot/IndexCreate_p.html +++ b/htroot/IndexCreate_p.html @@ -23,6 +23,34 @@ Value Description + + Starting Point: + + + + + + + + + + + + + + + +
From File:
From URL: + + +
+ + + Existing start URLs are re-crawled. + Other already visited URLs are sorted out as "double". + A complete re-crawl will be available soon. + + Crawling Depth: @@ -164,31 +192,14 @@ --> - Starting Point: + Wanted Performance: - - - - - - - - - - - - - - -
From File:
From URL: - - -
+ maximum   + custom: PPM   + optimal as background process - Existing start URLs are re-crawled. - Other already visited URLs are sorted out as "double". - A complete re-crawl will be available soon. + Set wanted level of computing power, used for this and other running crawl tasks. (PPM = pages per minute) diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index 2a356c0b8..8777e60f9 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -144,13 +144,21 @@ public class IndexCreate_p { prop.put("indexingTextChecked", env.getConfig("indexText", "").equals("true") ? 1 : 0); prop.put("indexingMediaChecked", env.getConfig("indexMedia", "").equals("true") ? 1 : 0); prop.put("crawlOrderChecked", env.getConfig("crawlOrder", "").equals("true") ? 1 : 0); - long busySleep = Integer.parseInt(env.getConfig("62_remotetriggeredcrawl_busysleep", "100")); - if (busySleep < 100) { - busySleep = 100; - env.setConfig("62_remotetriggeredcrawl_busysleep", Long.toString(busySleep)); + + long LCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, "100")); + int LCppm = (int) (60000L / LCbusySleep); + prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? 1 : 0); + prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? 1 : 0); + prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? 1 : 0); + prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : ""); + + long RTCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, "100")); + if (RTCbusySleep < 100) { + RTCbusySleep = 100; + env.setConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, Long.toString(RTCbusySleep)); } if (env.getConfig("crawlResponse", "").equals("true")) { - if (busySleep <= 100) { + if (RTCbusySleep <= 100) { prop.put("acceptCrawlMaxChecked", 1); prop.put("acceptCrawlLimitedChecked", 0); prop.put("acceptCrawlDeniedChecked", 0); @@ -164,9 +172,10 @@ public class IndexCreate_p { prop.put("acceptCrawlLimitedChecked", 0); prop.put("acceptCrawlDeniedChecked", 1); } - int ppm = (int) (60000L / busySleep); - if (ppm > 60) ppm = 60; - prop.put("PPM", ppm); + int RTCppm = (int) (60000L / RTCbusySleep); + if (RTCppm > 60) RTCppm = 60; + prop.put("PPM", RTCppm); + prop.put("xsstopwChecked", env.getConfig("xsstopw", "").equals("true") ? 1 : 0); prop.put("xdstopwChecked", env.getConfig("xdstopw", "").equals("true") ? 1 : 0); prop.put("xpstopwChecked", env.getConfig("xpstopw", "").equals("true") ? 1 : 0); diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html index 1080ae0d0..a77af9fb9 100644 --- a/htroot/PerformanceQueues_p.html +++ b/htroot/PerformanceQueues_p.html @@ -51,7 +51,7 @@ #[memusepercycle]# milliseconds milliseconds - kbytes + kbytes #[longdescr]# #{/table}# diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index 0107bd20b..c41d269c9 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -135,7 +135,8 @@ public class PerformanceQueues_p { idlesleep = post.getLong(threadName + "_idlesleep", 1000); busysleep = post.getLong(threadName + "_busysleep", 100); memprereq = post.getLong(threadName + "_memprereq", 0) * 1024; - + if (memprereq == 0) memprereq = sb.getConfigLong(threadName + "_memprereq", 0); + // check values to prevent short-cut loops if (idlesleep < 1000) idlesleep = 1000; if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep = 0; memprereq = 0; } diff --git a/htroot/WatchCrawler_p.html b/htroot/WatchCrawler_p.html index d760e4673..64099415b 100644 --- a/htroot/WatchCrawler_p.html +++ b/htroot/WatchCrawler_p.html @@ -54,6 +54,20 @@ + + + + + + + + + + + + +
Speed
PPM
+ diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java index 3d6a30a14..af43ccf88 100644 --- a/htroot/WatchCrawler_p.java +++ b/htroot/WatchCrawler_p.java @@ -143,6 +143,8 @@ public class WatchCrawler_p { boolean xpstopw = post.get("xpstopw", "off").equals("on"); env.setConfig("xpstopw", (xpstopw) ? "true" : "false"); + setPerformance(switchboard, post); + String crawlingMode = post.get("crawlingMode","url"); if (crawlingMode.equals("url")) { // getting the crawljob start url @@ -321,6 +323,10 @@ public class WatchCrawler_p { } } } + + if (post.containsKey("crawlingPerformance")) { + setPerformance(switchboard, post); + } } // crawl profiles @@ -371,6 +377,14 @@ public class WatchCrawler_p { } prop.put("crawlProfiles", count); + // performance settings + long LCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP, "100")); + int LCppm = (int) (60000L / LCbusySleep); + prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? 1 : 0); + prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? 1 : 0); + prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? 1 : 0); + prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : ""); + // return rewrite properties return prop; } @@ -385,4 +399,15 @@ public class WatchCrawler_p { return -1; } + private static void setPerformance(plasmaSwitchboard sb, serverObjects post) { + String crawlingPerformance = post.get("crawlingPerformance","custom"); + int wantedPPM = 1000; + try { + wantedPPM = Integer.parseInt(post.get("customPPM","1000")); + } catch (NumberFormatException e) {} + if (crawlingPerformance.equals("minimum")) wantedPPM = 10; + if (crawlingPerformance.equals("maximum")) wantedPPM = 1000; + sb.setPerformance(wantedPPM); + } + } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index e9a8c444a..d3816746f 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -309,6 +309,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String PEER_PING_METHOD_START = "peerPing"; public static final String PEER_PING_METHOD_JOBCOUNT = null; public static final String PEER_PING_METHOD_FREEMEM = null; + public static final String PEER_PING_IDLESLEEP = "30_peerping_idlesleep"; + public static final String PEER_PING_BUSYSLEEP = "30_peerping_busysleep"; // 40_peerseedcycle /** @@ -319,6 +321,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String SEED_UPLOAD_METHOD_START = "publishSeedList"; public static final String SEED_UPLOAD_METHOD_JOBCOUNT = null; public static final String SEED_UPLOAD_METHOD_FREEMEM = null; + public static final String SEED_UPLOAD_IDLESLEEP = "40_peerseedcycle_idlesleep"; + public static final String SEED_UPLOAD_BUSYSLEEP = "40_peerseedcycle_busysleep"; // 50_localcrawl /** @@ -332,6 +336,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String CRAWLJOB_LOCAL_CRAWL_METHOD_START = "coreCrawlJob"; public static final String CRAWLJOB_LOCAL_CRAWL_METHOD_JOBCOUNT = "coreCrawlJobSize"; public static final String CRAWLJOB_LOCAL_CRAWL_METHOD_FREEMEM = null; + public static final String CRAWLJOB_LOCAL_CRAWL_IDLESLEEP = "50_localcrawl_idlesleep"; + public static final String CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP = "50_localcrawl_busysleep"; // 61_globalcawltrigger /** @@ -345,6 +351,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String CRAWLJOB_GLOBAL_CRAWL_TRIGGER_METHOD_START = "limitCrawlTriggerJob"; public static final String CRAWLJOB_GLOBAL_CRAWL_TRIGGER_METHOD_JOBCOUNT = "limitCrawlTriggerJobSize"; public static final String CRAWLJOB_GLOBAL_CRAWL_TRIGGER_METHOD_FREEMEM = null; + public static final String CRAWLJOB_GLOBAL_CRAWL_TRIGGER_IDLESLEEP = "61_globalcrawltrigger_idlesleep"; + public static final String CRAWLJOB_GLOBAL_CRAWL_TRIGGER_BUSYSLEEP = "61_globalcrawltrigger_busysleep"; // 62_remotetriggeredcrawl /** @@ -355,6 +363,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_START = "remoteTriggeredCrawlJob"; public static final String CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_JOBCOUNT = "remoteTriggeredCrawlJobSize"; public static final String CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_FREEMEM = null; + public static final String CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP = "62_remotetriggeredcrawl_idlesleep"; + public static final String CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP = "62_remotetriggeredcrawl_busysleep"; // 70_cachemanager /** @@ -368,6 +378,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String PROXY_CACHE_ENQUEUE_METHOD_START = "htEntryStoreJob"; public static final String PROXY_CACHE_ENQUEUE_METHOD_JOBCOUNT = "htEntrySize"; public static final String PROXY_CACHE_ENQUEUE_METHOD_FREEMEM = null; + public static final String PROXY_CACHE_ENQUEUE_IDLESLEEP = "70_cachemanager_idlesleep"; + public static final String PROXY_CACHE_ENQUEUE_BUSYSLEEP = "70_cachemanager_busysleep"; // 80_indexing /** @@ -393,6 +405,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String CRAWLSTACK_METHOD_START = "job"; public static final String CRAWLSTACK_METHOD_JOBCOUNT = "size"; public static final String CRAWLSTACK_METHOD_FREEMEM = null; + public static final String CRAWLSTACK_IDLESLEEP = "82_crawlstack_idlesleep"; + public static final String CRAWLSTACK_BUSYSLEEP = "82_crawlstack_busysleep"; // 90_cleanup /** @@ -403,6 +417,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String CLEANUP_METHOD_START = "cleanupJob"; public static final String CLEANUP_METHOD_JOBCOUNT = "cleanupJobSize"; public static final String CLEANUP_METHOD_FREEMEM = null; + public static final String CLEANUP_IDLESLEEP = "90_cleanup_idlesleep"; + public static final String CLEANUP_BUSYSLEEP = "90_cleanup_busysleep"; ////////////////////////////////////////////////////////////////////////////////////////////// // RAM Cache settings @@ -2926,21 +2942,38 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // 1000 <= wantedPPM : maximum performance if (wantedPPM <= 10) wantedPPM = 10; if (wantedPPM >= 1000) wantedPPM = 1000; - int newBusySleep = 60000 / wantedPPM; + int newBusySleep = 60000 / wantedPPM; // for wantedPPM = 10: 6000; for wantedPPM = 1000: 60 + + serverThread thread; + + thread = getThread(INDEX_DIST); + setConfig(INDEX_DIST_BUSYSLEEP , thread.setBusySleep(Math.max(2000, thread.setBusySleep(newBusySleep * 2)))); + thread.setIdleSleep(30000); + + thread = getThread(CRAWLJOB_LOCAL_CRAWL); + setConfig(CRAWLJOB_LOCAL_CRAWL_BUSYSLEEP , thread.setBusySleep(newBusySleep)); + thread.setIdleSleep(1000); + + thread = getThread(CRAWLJOB_GLOBAL_CRAWL_TRIGGER); + setConfig(CRAWLJOB_GLOBAL_CRAWL_TRIGGER_BUSYSLEEP , thread.setBusySleep(Math.max(1000, newBusySleep * 3))); + thread.setIdleSleep(10000); - serverThread CRAWLSTACK_thread = getThread(CRAWLSTACK); - serverThread INDEXER_thread = getThread(INDEXER); - serverThread PROXY_CACHE_ENQUEUE_thread = getThread(PROXY_CACHE_ENQUEUE); - serverThread CRAWLJOB_REMOTE_TRIGGERED_CRAWL_thread = getThread(CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - serverThread CRAWLJOB_GLOBAL_CRAWL_TRIGGER_thread = getThread(CRAWLJOB_GLOBAL_CRAWL_TRIGGER); - serverThread CRAWLJOB_LOCAL_CRAWL_thread = getThread(CRAWLJOB_LOCAL_CRAWL); - serverThread INDEX_DIST_thread = getThread(INDEX_DIST); + thread = getThread(CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + setConfig(CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP , thread.setBusySleep(newBusySleep * 10)); + thread.setIdleSleep(10000); + thread = getThread(PROXY_CACHE_ENQUEUE); + setConfig(PROXY_CACHE_ENQUEUE_BUSYSLEEP , thread.setBusySleep(0)); + thread.setIdleSleep(1000); - CRAWLJOB_LOCAL_CRAWL_thread.setBusySleep(newBusySleep); + thread = getThread(INDEXER); + setConfig(INDEXER_BUSYSLEEP , thread.setBusySleep(newBusySleep / 4)); + thread.setIdleSleep(1000); + thread = getThread(CRAWLSTACK); + setConfig(CRAWLSTACK_BUSYSLEEP , thread.setBusySleep(0)); + thread.setIdleSleep(5000); - setConfig(CRAWLJOB_LOCAL_CRAWL, Long.toString(newBusySleep)); } public void startTransferWholeIndex(yacySeed seed, boolean delete) { diff --git a/source/de/anomic/server/serverAbstractThread.java b/source/de/anomic/server/serverAbstractThread.java index 569934929..1ca6d07a8 100644 --- a/source/de/anomic/server/serverAbstractThread.java +++ b/source/de/anomic/server/serverAbstractThread.java @@ -101,14 +101,16 @@ public abstract class serverAbstractThread extends Thread implements serverThrea startup = milliseconds; } - public final void setIdleSleep(long milliseconds) { + public final long setIdleSleep(long milliseconds) { // sets a sleep time for pauses between two jobs idlePause = milliseconds; + return milliseconds; } - public final void setBusySleep(long milliseconds) { + public final long setBusySleep(long milliseconds) { // sets a sleep time for pauses between two jobs busyPause = milliseconds; + return milliseconds; } public void setMemPreReqisite(long freeBytes) { diff --git a/source/de/anomic/server/serverThread.java b/source/de/anomic/server/serverThread.java index f6ad9d8fe..228a4ec38 100644 --- a/source/de/anomic/server/serverThread.java +++ b/source/de/anomic/server/serverThread.java @@ -59,10 +59,10 @@ public interface serverThread { public void setStartupSleep(long milliseconds); // sets a sleep time before execution of the job-loop - public void setIdleSleep(long milliseconds); + public long setIdleSleep(long milliseconds); // sets a sleep time for pauses between two jobs if the job returns false (idle) - public void setBusySleep(long milliseconds); + public long setBusySleep(long milliseconds); // sets a sleep time for pauses between two jobs if the job returns true (busy) public void setMemPreReqisite(long freeBytes);