From c64970fa47778d0d4b980c1a2750b770a53f4970 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 21 Jul 2005 11:17:04 +0000 Subject: [PATCH] re-implemented proxy-busy-check and fixed some other things git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@421 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreate_p.html | 10 ++++- htroot/Performance_p.html | 2 +- htroot/env/grafics/notifier.gif | Bin 836 -> 824 bytes source/de/anomic/http/httpdProxyHandler.java | 5 +++ source/de/anomic/plasma/plasmaHTCache.java | 35 +--------------- .../de/anomic/plasma/plasmaSwitchboard.java | 38 ++++++++---------- .../plasmaWordIndexAssortmentCluster.java | 19 +++++---- 7 files changed, 41 insertions(+), 68 deletions(-) diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html index 59cd6f976..e4561449d 100644 --- a/htroot/IndexCreate_p.html +++ b/htroot/IndexCreate_p.html @@ -165,8 +165,14 @@ Error with file input "#[crawlingStart]#": #[error]# Set new prefetch depth to "#[newproxyPrefetchDepth]#" :: Crawling of "#[crawlingURL]#" started. -You can monitor the crawling progress with this page. -Please wait some seconds before refresh of this page, because the request is enqueued and delayed until the http server is idle for a certain time. +You can monitor the crawling progress either by watching the URL queues +(local queue, +global queue, +loader queue, +indexing queue) +or see the fill/process count of all queues on the +performance page. +Please wait some seconds, because the request is enqueued and delayed until the http server is idle for a certain time. The indexing result is presented on the Index Monitor-page. It will take at least 30 seconds until the first result appears there. Please be patient, the crawling will pause each time you use the proxy or web server to ensure maximum availability. diff --git a/htroot/Performance_p.html b/htroot/Performance_p.html index 47826943f..441baf827 100644 --- a/htroot/Performance_p.html +++ b/htroot/Performance_p.html @@ -54,7 +54,7 @@     -    +    Changes take effect immediately diff --git a/htroot/env/grafics/notifier.gif b/htroot/env/grafics/notifier.gif index 761a15a990c966c0658b4dbec7ac3947f55be9a0..330c7e881e24f150e542d2e7ab11996afad442d1 100644 GIT binary patch delta 32 ocmX@Ywu5bh0<(lD10#ow$A$$5n>mEFVoq#Wc(|Q`iHX4)0HFK{%>V!Z delta 44 zcmdnNc7$z%0<(%D10#ow$Akxqo7uU!TvlvIbZp_2^_t@$_(-``Oxj3BLV&><04BT) AQvd(} diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index 4a0a5595e..16dbe4fb6 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -272,6 +272,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt Date requestDate = new Date(); // remember the time... this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime())); if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction(); + switchboard.proxyLastAccess = System.currentTimeMillis(); // using an ByteCount OutputStream to count the send bytes (needed for the logfile) respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2); @@ -786,6 +787,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt String args = conProp.getProperty("ARGS"); // may be null if no args were given String httpVer = conProp.getProperty(httpd.CONNECTION_PROP_HTTP_VER); + switchboard.proxyLastAccess = System.currentTimeMillis(); + int port; int pos; if ((pos = host.indexOf(":")) < 0) { @@ -866,6 +869,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt // remembering the starting time of the request Date requestDate = new Date(); // remember the time... this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime())); + switchboard.proxyLastAccess = System.currentTimeMillis(); // using an ByteCount OutputStream to count the send bytes respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2); @@ -953,6 +957,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException { this.connectionProperties = conProp; + switchboard.proxyLastAccess = System.currentTimeMillis(); String host = conProp.getProperty("HOST"); int port = Integer.parseInt(conProp.getProperty("PORT")); diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index f0fb7d2cb..2c2a38b95 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -74,7 +74,6 @@ import de.anomic.tools.enumerateFiles; public final class plasmaHTCache { private static final int stackLimit = 150; // if we exceed that limit, we do not check idle - private static final long idleDelay = 2000; // 2 seconds no hits until we think that we idle public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day private kelondroMap responseHeaderDB = null; @@ -82,20 +81,9 @@ public final class plasmaHTCache { private final TreeMap cacheAge; // a - relation public long currCacheSize; public long maxCacheSize; - private long lastAcc; public final File cachePath; public static serverLog log; - /* - public static final int CACHE_UNFILLED = 0; // default case without assignment - public static final int CACHE_FILL = 1; // this means: update == true - public static final int CACHE_HIT = 2; // the best case: reading from Cache - public static final int CACHE_STALE_NO_RELOAD = 3; // this shall be treated as a rare case that should not appear - public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true - public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content - public static final int CACHE_PASSING = 6; // does not touch cache, just passing - */ - public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) { //this.switchboard = switchboard; @@ -129,10 +117,7 @@ public final class plasmaHTCache { // init stack cacheStack = new LinkedList(); - // init idle check - lastAcc = System.currentTimeMillis(); - - // init cache age and size management + // init cache age and size management cacheAge = new TreeMap(); currCacheSize = 0; this.maxCacheSize = maxCacheSize; @@ -299,10 +284,6 @@ public final class plasmaHTCache { return new httpHeader(null, hdb); } - public boolean idle() { - return (System.currentTimeMillis() > (idleDelay + lastAcc)); - } - public boolean full() { return (cacheStack.size() > stackLimit); } @@ -415,20 +396,6 @@ public final class plasmaHTCache { } } - /* - public void saveResource(URL url, byte[] resource) { - File f = getCachePath(url); - f.getParentFile().mkdirs(); - FileOutputStream fos = null; - try { - fos = new FileOutputStream(f); - htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file - } finally { - if (fos!=null)try{fos.close();}catch(Exception e){} - } - } - */ - public static boolean isPOST(String urlString) { return ((urlString.indexOf("?") >= 0) || (urlString.indexOf("&") >= 0)); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 267017125..21165146e 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -180,6 +180,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public kelondroTables facilityDB; public plasmaParser parser; public plasmaWordIndexClassicCacheMigration classicCache; + public long proxyLastAccess; private serverSemaphore shutdownSync = new serverSemaphore(0); private boolean terminate = false; @@ -209,7 +210,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser remoteProxyHost = null; remoteProxyPort = 0; } - + proxyLastAccess = 0; if (!(listsPath.exists())) listsPath.mkdirs(); @@ -404,6 +405,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser log.logSystem("Finished Switchboard Initialization"); } + public boolean onlineCaution() { + return System.currentTimeMillis() - proxyLastAccess < 30000; + } + private static String ppRamString(int bytes) { if (bytes < 1024) return bytes + " KByte"; bytes = bytes / 1024; @@ -557,7 +562,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public boolean deQueue() { // work off fresh entries from the proxy or from the crawler - + if (onlineCaution()) { + log.logDebug("deQueue: online caution, omitting resource stack processing"); + return false; + } plasmaSwitchboardQueue.Entry nextentry; synchronized (sbQueue) { if (sbQueue.size() == 0) { @@ -565,12 +573,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return false; // nothing to do } - // in case that the server is very busy we do not work off the queue too fast - if (!(cacheManager.idle())) try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {} - // do one processing step - log.logDebug("DEQUEUE: cacheManager=" + ((cacheManager.idle()) ? "idle" : "busy") + - ", sbQueueSize=" + sbQueue.size() + + log.logDebug("DEQUEUE: sbQueueSize=" + sbQueue.size() + ", coreStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", limitStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", overhangStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + @@ -666,7 +670,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser "cacheLoader=" + cacheLoader.size() + ")"); return false; } - + if (onlineCaution()) { + log.logDebug("CoreCrawl: online caution, omitting processing"); + return false; + } // if the server is busy, we do crawling more slowly //if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {} @@ -797,21 +804,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser //log.logDebug("GlobalCrawl: queue is empty"); return false; } - /* - if (queueStack.size() > 0) { - log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" + - "processStack=" + queueStack.size() + ")"); - return false; - } - if (noticeURL.coreStackSize() > 0) { - log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" + - "coreStackSize=" + noticeURL.coreStackSize() + ")"); + if (onlineCaution()) { + log.logDebug("GlobalCrawl: online caution, omitting processing"); return false; } - */ - - // if the server is busy, we do this more slowly - //if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {} // if crawling was paused we have to wait until we wer notified to continue synchronized(this.crawlingPausedSync) { diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 2a4fcd26b..35fed67a5 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -125,18 +125,17 @@ public final class plasmaWordIndexAssortmentCluster { return; } - // calculate appropriate cluster insert point - int clusterStart = clusterCount; - if ((((byte) wordHash.charAt(0)) & 1) == 1) { - // for every second hash, place the entries in the middle of the assortments - // this balances the entries within the assortments-cluster - int cap = clusterCapacity - newContainer.size() - 2 * clusterCount; - while (cap > 0) { - cap -= clusterStart; - clusterStart--; - } + // calculate minimum cluster insert point + int clusterMinStart = clusterCount; + int cap = clusterCapacity - newContainer.size() - 2 * clusterCount; + while (cap > 0) { + cap -= clusterMinStart; + clusterMinStart--; } + // point the real cluster insert point somewhere between the minimum and the maximum + int clusterStart = clusterCount - (int) (Math.random() * (clusterCount - clusterMinStart)); + // do the insert plasmaWordIndexEntryContainer c; Iterator i = newContainer.entries();