From b73557ed2d3d154ced0f9c34524c2dcf33a3d1f4 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 20 Jul 2005 13:03:41 +0000 Subject: [PATCH] better assortment monitoring and enhanced profile menue git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@416 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Performance_p.html | 60 +++++++++---------- htroot/Performance_p.java | 19 ++++-- .../de/anomic/plasma/plasmaSnippetCache.java | 5 ++ .../de/anomic/plasma/plasmaSwitchboard.java | 22 +++---- .../plasmaWordIndexAssortmentCluster.java | 18 +++++- .../anomic/plasma/plasmaWordIndexCache.java | 59 +----------------- .../anomic/server/serverAbstractSwitch.java | 6 +- .../anomic/server/serverAbstractThread.java | 8 ++- .../server/serverPortForwardingSch.java | 2 +- source/de/anomic/server/serverSwitch.java | 1 + source/de/anomic/server/serverThread.java | 5 +- source/yacy.java | 2 +- 12 files changed, 94 insertions(+), 113 deletions(-) diff --git a/htroot/Performance_p.html b/htroot/Performance_p.html index 2e05ce9ad..79eb7a57c 100644 --- a/htroot/Performance_p.html +++ b/htroot/Performance_p.html @@ -22,8 +22,8 @@ Idle
Cycles Busy
Cycles Short Mem
Cycles -Sleep Time
per Cycle
(milliseconds) -Exec Time
per Busy-Cycle
(milliseconds) +Sleep Time
per Cycle
(millis) +Exec Time
per Busy-Cycle
(millis) Delay between
idle loops Delay between
busy loops Minimum of
Required Memory @@ -57,17 +57,13 @@ Changes take effect immediately -

+ +

Indexing Cache Settings:
- - - - - - +
@@ -85,13 +81,6 @@ Changes take effect immediately If this is a big number, it shows that the caching works efficiently. - #{assortmentCluster}# - - - - - - #{/assortmentCluster}# @@ -101,27 +90,16 @@ Changes take effect immediately flushed to disc; this may last some minutes. - - - - -
Words in RAM Cache: #[wordCacheRAMSize]#
Assortment #[assortmentSlot]#:#[assortmentSize]#Buffer for words appearing exactly on #[assortmentSlot]# different pages.
Maximum number of Word Caches:
+ + Changes take effect immediately
+

+

Thread pool settings:
@@ -149,6 +127,26 @@ Changes take effect immediately

+

+

Index Assortments:
+ + #{assortmentCluster}# + + + + + + + + + + + + #{/assortmentCluster}# +
Assortments #[assortmentSlots]#:#[assortmentSizeA]##[assortmentSizeB]##[assortmentSizeC]##[assortmentSizeD]##[assortmentSizeE]##[assortmentSizeF]##[assortmentSizeG]##[assortmentSizeH]#
+

+ + #[footer]# diff --git a/htroot/Performance_p.java b/htroot/Performance_p.java index 67d2c5725..8243d206b 100644 --- a/htroot/Performance_p.java +++ b/htroot/Performance_p.java @@ -91,8 +91,8 @@ public class Performance_p { thread = switchboard.getThread(threadName); // set values to templates - prop.put("table_" + c + "_threadname", threadName); - prop.put("table_" + c + "_shortdescr", thread.getShortDescription()); + //prop.put("table_" + c + "_threadname", threadName); + prop.put("table_" + c + "_shortdescr", (thread.getMonitorURL() == null) ? thread.getShortDescription() : "" + thread.getShortDescription() + ""); prop.put("table_" + c + "_longdescr", thread.getLongDescription()); queuesize = thread.getJobCount(); prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : ("" + queuesize)); @@ -186,11 +186,18 @@ public class Performance_p { prop.put("wordCacheMax", switchboard.getConfig("wordCacheMax", "10000")); int[] asizes = switchboard.wordIndex.assortmentSizes(); - for (int i = 0; i < asizes.length; i++) { - prop.put("assortmentCluster_" + i + "_assortmentSlot", i + 1); - prop.put("assortmentCluster_" + i + "_assortmentSize", asizes[i]); + for (int i = 0; i < asizes.length; i += 8) { + prop.put("assortmentCluster_" + (i/8) + "_assortmentSlots", (i + 1) + "-" + (i + 8)); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeA", asizes[i]); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeB", asizes[i + 1]); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeC", asizes[i + 2]); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeD", asizes[i + 3]); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeE", asizes[i + 4]); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeF", asizes[i + 5]); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeG", asizes[i + 6]); + prop.put("assortmentCluster_" + (i/8) + "_assortmentSizeH", asizes[i + 7]); } - prop.put("assortmentCluster", asizes.length); + prop.put("assortmentCluster", asizes.length / 8); // table thread pool settings GenericObjectPool.Config crawlerPoolConfig = switchboard.cacheLoader.getPoolConfig(); diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 54d60a7f2..c3695f5ee 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -202,6 +202,7 @@ public class plasmaSnippetCache { } private String computeSnippet(String[] sentences, Set queryhashes, int minLength, int maxLength) { + try { if ((sentences == null) || (sentences.length == 0)) return null; if ((queryhashes == null) || (queryhashes.size() == 0)) return null; kelondroMScoreCluster hitTable = new kelondroMScoreCluster(); @@ -297,6 +298,10 @@ public class plasmaSnippetCache { if (maxLength < 20) maxLength = 20; String nextSnippet = computeSnippet(sentences, remaininghashes, minLength, maxLength); return result + ((nextSnippet == null) ? "" : (" / " + nextSnippet)); + } catch (IndexOutOfBoundsException e) { + e.printStackTrace(); + return ""; + } } private HashMap hashSentence(String sentence) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 393b91bf3..267017125 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -351,36 +351,36 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser System.gc(); // help for profiler int indexing_cluster = Integer.parseInt(getConfig("80_indexing_cluster", "1")); if (indexing_cluster < 1) indexing_cluster = 1; - deployThread("90_cleanup", "Cleanup", "simple cleaning process for monitoring information" , + deployThread("90_cleanup", "Cleanup", "simple cleaning process for monitoring information", null, new serverInstantThread(this, "cleanupJob", "cleanupJobSize"), 10000); // all 5 Minutes - deployThread("80_indexing", "Parsing/Indexing", "thread that performes document parsing and indexing" , + deployThread("80_indexing", "Parsing/Indexing", "thread that performes document parsing and indexing", "/IndexCreateIndexingQueue_p.html", new serverInstantThread(this, "deQueue", "queueSize"), 10000); for (int i = 1; i < indexing_cluster; i++) { setConfig((i + 80) + "_indexing_idlesleep", getConfig("80_indexing_idlesleep", "")); setConfig((i + 80) + "_indexing_busysleep", getConfig("80_indexing_busysleep", "")); - deployThread((i + 80) + "_indexing", "Parsing/Indexing (cluster job)", "thread that performes document parsing and indexing" , + deployThread((i + 80) + "_indexing", "Parsing/Indexing (cluster job)", "thread that performes document parsing and indexing", null, new serverInstantThread(this, "deQueue", "queueSize"), 10000 + (i * 1000)); } - deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack", + deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack", null, new serverInstantThread(this, "htEntryStoreJob", "htEntrySize"), 10000); - deployThread("62_remotetriggeredcrawl", "Remote Crawl Job", "thread that performes a single crawl/indexing step triggered by a remote peer", + deployThread("62_remotetriggeredcrawl", "Remote Crawl Job", "thread that performes a single crawl/indexing step triggered by a remote peer", null, new serverInstantThread(this, "remoteTriggeredCrawlJob", "remoteTriggeredCrawlJobSize"), 30000); - deployThread("61_globalcrawltrigger", "Global Crawl Trigger", "thread that triggeres remote peers for crawling", + deployThread("61_globalcrawltrigger", "Global Crawl Trigger", "thread that triggeres remote peers for crawling", "/IndexCreateWWWGlobalQueue_p.html", new serverInstantThread(this, "limitCrawlTriggerJob", "limitCrawlTriggerJobSize"), 30000); // error here? - deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue", + deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue", "/IndexCreateWWWLocalQueue_p.html", new serverInstantThread(this, "coreCrawlJob", "coreCrawlJobSize"), 10000); - deployThread("40_peerseedcycle", "Seed-List Upload", "task that a principal peer performes to generate and upload a seed-list to a ftp account", + deployThread("40_peerseedcycle", "Seed-List Upload", "task that a principal peer performes to generate and upload a seed-list to a ftp account", null, new serverInstantThread(yc, "publishSeedList", null), 180000); serverInstantThread peerPing = null; - deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task", + deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task", null, peerPing = new serverInstantThread(yc, "peerPing", null), 2000); peerPing.setSyncObject(new Object()); indexDistribution = new plasmaWordIndexDistribution(urlPool, wordIndex, log, getConfig("allowDistributeIndex", "false").equals("true")); indexDistribution.setCounts(100, 1, 8000); - deployThread("20_dhtdistribution", "DHT Distribution (currently by juniors only)", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", + deployThread("20_dhtdistribution", "DHT Distribution (currently by juniors only)", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", null, new serverInstantThread(indexDistribution, "job", null), 120000); // init migratiion from 0.37 -> 0.38 @@ -389,7 +389,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (classicCache.size() > 0) { setConfig("99_indexcachemigration_idlesleep" , 10000); setConfig("99_indexcachemigration_busysleep" , 40); - deployThread("99_indexcachemigration", "index cache migration", "migration of index cache data structures 0.37 -> 0.38", + deployThread("99_indexcachemigration", "index cache migration", "migration of index cache data structures 0.37 -> 0.38", null, new serverInstantThread(classicCache, "oneStepMigration", "size"), 30000); } diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index b1e6a46f7..2a4fcd26b 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -118,13 +118,29 @@ public final class plasmaWordIndexAssortmentCluster { private void storeStretched(String wordHash, plasmaWordIndexEntryContainer newContainer) { // this stores the record and stretches the storage over // all the assortments that are necessary to fit in the record + // IMPORTANT: it must be ensured that the wordHash does not exist in the cluster before + // i.e. by calling removeFromAll if (newContainer.size() <= clusterCount) { storeForced(wordHash, newContainer); return; } + + // calculate appropriate cluster insert point + int clusterStart = clusterCount; + if ((((byte) wordHash.charAt(0)) & 1) == 1) { + // for every second hash, place the entries in the middle of the assortments + // this balances the entries within the assortments-cluster + int cap = clusterCapacity - newContainer.size() - 2 * clusterCount; + while (cap > 0) { + cap -= clusterStart; + clusterStart--; + } + } + + // do the insert plasmaWordIndexEntryContainer c; Iterator i = newContainer.entries(); - for (int j = clusterCount; j >= 1; j--) { + for (int j = clusterStart; j >= 1; j--) { c = new plasmaWordIndexEntryContainer(wordHash); for (int k = 0; k < j; k++) { if (i.hasNext()) { diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index b60e793df..e2faca2a0 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -67,7 +67,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { private static final String oldSingletonFileName = "indexSingletons0.db"; private static final String newSingletonFileName = "indexAssortment001.db"; private static final String indexAssortmentClusterPath = "ACLUSTER"; - private static final int assortmentCount = 50; + private static final int assortmentCount = 64; private static final int ramCacheLimit = 200; @@ -242,63 +242,6 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } return urlCount; } - - /* - private void dump(int waitingSeconds) throws IOException { - log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)"); - File indexDumpFile = new File(databaseRoot, indexStackFileName); - if (indexDumpFile.exists()) indexDumpFile.delete(); - kelondroStack dumpStack = new kelondroStack(indexDumpFile, 1024, plasmaWordIndexAssortment.bufferStructureBasis); - long startTime = System.currentTimeMillis(); - long messageTime = System.currentTimeMillis() + 5000; - long wordsPerSecond = 0, wordcount = 0, urlcount = 0; - synchronized (cache) { - Iterator i = cache.entrySet().iterator(); - Map.Entry entry; - String wordHash; - plasmaWordIndexEntryContainer container; - long updateTime; - plasmaWordIndexEntry wordEntry; - byte[][] row = new byte[5][]; - System.gc(); // this can speed up the assortment, because they may better use the cache - while (i.hasNext()) { - // get entries - entry = (Map.Entry) i.next(); - wordHash = (String) entry.getKey(); - updateTime = getUpdateTime(wordHash); - container = (plasmaWordIndexEntryContainer) entry.getValue(); - - // put entries on stack - if (container != null) { - Iterator ci = container.entries(); - while (ci.hasNext()) { - wordEntry = (plasmaWordIndexEntry) ci.next(); - row[0] = wordHash.getBytes(); - row[1] = kelondroRecords.long2bytes(container.size(), 4); - row[2] = kelondroRecords.long2bytes(updateTime, 8); - row[3] = wordEntry.getUrlHash().getBytes(); - row[4] = wordEntry.toEncodedForm(true).getBytes(); - dumpStack.push(row); - urlcount++; - } - } - wordcount++; - i.remove(); // free some mem - - // write a log - if (System.currentTimeMillis() > messageTime) { - System.gc(); // for better statistic - wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime); - log.logInfo("dumping status: " + wordcount + " words done, " + (cache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB"); - messageTime = System.currentTimeMillis() + 5000; - } - } - } - dumpStack.close(); - log.logSystem("dumped " + urlcount + " word/url relations in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds"); - } - - */ private long restoreStack() throws IOException { File indexDumpFile = new File(databaseRoot, indexStackFileName); diff --git a/source/de/anomic/server/serverAbstractSwitch.java b/source/de/anomic/server/serverAbstractSwitch.java index 2f1922ceb..4d69f8805 100644 --- a/source/de/anomic/server/serverAbstractSwitch.java +++ b/source/de/anomic/server/serverAbstractSwitch.java @@ -257,9 +257,10 @@ public abstract class serverAbstractSwitch implements serverSwitch { String threadName, String threadShortDescription, String threadLongDescription, + String threadMonitorURL, serverThread newThread, long startupDelay) { - deployThread(threadName, threadShortDescription, threadLongDescription, + deployThread(threadName, threadShortDescription, threadLongDescription, threadMonitorURL, newThread, startupDelay, Long.parseLong(getConfig(threadName + "_idlesleep" , "100")), Long.parseLong(getConfig(threadName + "_busysleep" , "1000")), @@ -270,6 +271,7 @@ public abstract class serverAbstractSwitch implements serverSwitch { String threadName, String threadShortDescription, String threadLongDescription, + String threadMonitorURL, serverThread newThread, long startupDelay, long initialIdleSleep, @@ -300,7 +302,7 @@ public abstract class serverAbstractSwitch implements serverSwitch { setConfig(threadName + "_memprereq", initialMemoryPreRequisite); } newThread.setLog(log); - newThread.setDescription(threadShortDescription, threadLongDescription); + newThread.setDescription(threadShortDescription, threadLongDescription, threadMonitorURL); workerThreads.put(threadName, newThread); // start the thread if (workerThreads.containsKey(threadName)) newThread.start(); diff --git a/source/de/anomic/server/serverAbstractThread.java b/source/de/anomic/server/serverAbstractThread.java index 9699f3386..d9235cbad 100644 --- a/source/de/anomic/server/serverAbstractThread.java +++ b/source/de/anomic/server/serverAbstractThread.java @@ -59,6 +59,7 @@ public abstract class serverAbstractThread extends Thread implements serverThrea private serverLog log = null; private long idletime = 0, busytime = 0, memprereq = 0; private String shortDescr = "", longDescr = ""; + private String monitorURL = null; private long threadBlockTimestamp = System.currentTimeMillis(); private long idleCycles = 0, busyCycles = 0, outofmemoryCycles = 0; private Object syncObject = null; @@ -87,10 +88,11 @@ public abstract class serverAbstractThread extends Thread implements serverThrea this.idletime += millis; } - public final void setDescription(String shortText, String longText) { + public final void setDescription(String shortText, String longText, String monitorURL) { // sets a visible description string this.shortDescr = shortText; this.longDescr = longText; + this.monitorURL = monitorURL; } public final void setStartupSleep(long milliseconds) { @@ -121,6 +123,10 @@ public abstract class serverAbstractThread extends Thread implements serverThrea return this.longDescr; } + public String getMonitorURL() { + return this.monitorURL; + } + public final long getIdleCycles() { // returns the total number of cycles of job execution with idle-result return this.idleCycles; diff --git a/source/de/anomic/server/serverPortForwardingSch.java b/source/de/anomic/server/serverPortForwardingSch.java index 6cd1b0cdb..6aba86044 100644 --- a/source/de/anomic/server/serverPortForwardingSch.java +++ b/source/de/anomic/server/serverPortForwardingSch.java @@ -138,7 +138,7 @@ public class serverPortForwardingSch implements serverPortForwarding{ // using a timer task to control if the session remains open if (sessionWatcher == null) { this.log.logDebug("Deploying port forwarding session watcher thread."); - this.switchboard.deployThread("portForwardingWatcher", "Remote Port Forwarding Watcher", "this thread is used to detect broken connections and to re-establish it if necessary.", + this.switchboard.deployThread("portForwardingWatcher", "Remote Port Forwarding Watcher", "this thread is used to detect broken connections and to re-establish it if necessary.", null, sessionWatcher = new serverInstantThread(this, "reconnect", null), 30000,30000,30000,1000); sessionWatcher.setSyncObject(new Object()); } diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java index 3d2bf9e29..8a24eba02 100644 --- a/source/de/anomic/server/serverSwitch.java +++ b/source/de/anomic/server/serverSwitch.java @@ -71,6 +71,7 @@ public interface serverSwitch { public void deployThread(String threadName, String threadShortDescription, String threadLongDescription, + String threadMonitorURL, serverThread newThread, long startupDelay, long initialIdleSleep, long initialBusySleep, diff --git a/source/de/anomic/server/serverThread.java b/source/de/anomic/server/serverThread.java index dd5921426..a02692f85 100644 --- a/source/de/anomic/server/serverThread.java +++ b/source/de/anomic/server/serverThread.java @@ -53,7 +53,7 @@ public interface serverThread { // these method are implemented by serverThread and do not need to be altered // this includes also the run()-Method - public void setDescription(String shortText, String longText); + public void setDescription(String shortText, String longText, String monitorURL); // sets a visible description string public void setStartupSleep(long milliseconds); @@ -74,6 +74,9 @@ public interface serverThread { public String getLongDescription(); // returns long description string for online display + public String getMonitorURL(); + // returns an URL that can be used to monitor the thread and it's queue + public long getIdleCycles(); // returns the total number of cycles of job execution with idle-result diff --git a/source/yacy.java b/source/yacy.java index d2b52aeeb..5b78ec859 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -300,7 +300,7 @@ public final class yacy { serverLog.logFailure("STARTUP", "Failed to start server. Probably port " + port + " already in use."); } else { // first start the server - sb.deployThread("10_httpd", "HTTPD Server/Proxy", "the HTTPD, used as web server and proxy", server, 0, 0, 0, 0); + sb.deployThread("10_httpd", "HTTPD Server/Proxy", "the HTTPD, used as web server and proxy", null, server, 0, 0, 0, 0); //server.start(); // open the browser window