diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html
index 59cd6f976..e4561449d 100644
--- a/htroot/IndexCreate_p.html
+++ b/htroot/IndexCreate_p.html
@@ -165,8 +165,14 @@ Error with file input "#[crawlingStart]#": #[error]#
Set new prefetch depth to "#[newproxyPrefetchDepth]#"
::
Crawling of "#[crawlingURL]#" started.
-You can monitor the crawling progress with this page.
-Please wait some seconds before refresh of this page, because the request is enqueued and delayed until the http server is idle for a certain time.
+You can monitor the crawling progress either by watching the URL queues
+(local queue,
+global queue,
+loader queue,
+indexing queue)
+or see the fill/process count of all queues on the
+performance page.
+Please wait some seconds, because the request is enqueued and delayed until the http server is idle for a certain time.
The indexing result is presented on the
Index Monitor-page.
It will take at least 30 seconds until the first result appears there. Please be patient, the crawling will pause each time you use the proxy or web server to ensure maximum availability.
diff --git a/htroot/Performance_p.html b/htroot/Performance_p.html
index 47826943f..441baf827 100644
--- a/htroot/Performance_p.html
+++ b/htroot/Performance_p.html
@@ -54,7 +54,7 @@
-
+
Changes take effect immediately
diff --git a/htroot/env/grafics/notifier.gif b/htroot/env/grafics/notifier.gif
index 761a15a99..330c7e881 100644
Binary files a/htroot/env/grafics/notifier.gif and b/htroot/env/grafics/notifier.gif differ
diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java
index 4a0a5595e..16dbe4fb6 100644
--- a/source/de/anomic/http/httpdProxyHandler.java
+++ b/source/de/anomic/http/httpdProxyHandler.java
@@ -272,6 +272,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
Date requestDate = new Date(); // remember the time...
this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction();
+ switchboard.proxyLastAccess = System.currentTimeMillis();
// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2);
@@ -786,6 +787,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String args = conProp.getProperty("ARGS"); // may be null if no args were given
String httpVer = conProp.getProperty(httpd.CONNECTION_PROP_HTTP_VER);
+ switchboard.proxyLastAccess = System.currentTimeMillis();
+
int port;
int pos;
if ((pos = host.indexOf(":")) < 0) {
@@ -866,6 +869,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// remembering the starting time of the request
Date requestDate = new Date(); // remember the time...
this.connectionProperties.put(httpd.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
+ switchboard.proxyLastAccess = System.currentTimeMillis();
// using an ByteCount OutputStream to count the send bytes
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpd.CONNECTION_PROP_REQUESTLINE).length() + 2);
@@ -953,6 +957,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException {
this.connectionProperties = conProp;
+ switchboard.proxyLastAccess = System.currentTimeMillis();
String host = conProp.getProperty("HOST");
int port = Integer.parseInt(conProp.getProperty("PORT"));
diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java
index f0fb7d2cb..2c2a38b95 100644
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@@ -74,7 +74,6 @@ import de.anomic.tools.enumerateFiles;
public final class plasmaHTCache {
private static final int stackLimit = 150; // if we exceed that limit, we do not check idle
- private static final long idleDelay = 2000; // 2 seconds no hits until we think that we idle
public static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day
private kelondroMap responseHeaderDB = null;
@@ -82,20 +81,9 @@ public final class plasmaHTCache {
private final TreeMap cacheAge; // a - relation
public long currCacheSize;
public long maxCacheSize;
- private long lastAcc;
public final File cachePath;
public static serverLog log;
- /*
- public static final int CACHE_UNFILLED = 0; // default case without assignment
- public static final int CACHE_FILL = 1; // this means: update == true
- public static final int CACHE_HIT = 2; // the best case: reading from Cache
- public static final int CACHE_STALE_NO_RELOAD = 3; // this shall be treated as a rare case that should not appear
- public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true
- public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content
- public static final int CACHE_PASSING = 6; // does not touch cache, just passing
- */
-
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) {
//this.switchboard = switchboard;
@@ -129,10 +117,7 @@ public final class plasmaHTCache {
// init stack
cacheStack = new LinkedList();
- // init idle check
- lastAcc = System.currentTimeMillis();
-
- // init cache age and size management
+ // init cache age and size management
cacheAge = new TreeMap();
currCacheSize = 0;
this.maxCacheSize = maxCacheSize;
@@ -299,10 +284,6 @@ public final class plasmaHTCache {
return new httpHeader(null, hdb);
}
- public boolean idle() {
- return (System.currentTimeMillis() > (idleDelay + lastAcc));
- }
-
public boolean full() {
return (cacheStack.size() > stackLimit);
}
@@ -415,20 +396,6 @@ public final class plasmaHTCache {
}
}
- /*
- public void saveResource(URL url, byte[] resource) {
- File f = getCachePath(url);
- f.getParentFile().mkdirs();
- FileOutputStream fos = null;
- try {
- fos = new FileOutputStream(f);
- htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file
- } finally {
- if (fos!=null)try{fos.close();}catch(Exception e){}
- }
- }
- */
-
public static boolean isPOST(String urlString) {
return ((urlString.indexOf("?") >= 0) ||
(urlString.indexOf("&") >= 0));
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 267017125..21165146e 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -180,6 +180,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public kelondroTables facilityDB;
public plasmaParser parser;
public plasmaWordIndexClassicCacheMigration classicCache;
+ public long proxyLastAccess;
private serverSemaphore shutdownSync = new serverSemaphore(0);
private boolean terminate = false;
@@ -209,7 +210,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
remoteProxyHost = null;
remoteProxyPort = 0;
}
-
+ proxyLastAccess = 0;
if (!(listsPath.exists())) listsPath.mkdirs();
@@ -404,6 +405,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
log.logSystem("Finished Switchboard Initialization");
}
+ public boolean onlineCaution() {
+ return System.currentTimeMillis() - proxyLastAccess < 30000;
+ }
+
private static String ppRamString(int bytes) {
if (bytes < 1024) return bytes + " KByte";
bytes = bytes / 1024;
@@ -557,7 +562,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public boolean deQueue() {
// work off fresh entries from the proxy or from the crawler
-
+ if (onlineCaution()) {
+ log.logDebug("deQueue: online caution, omitting resource stack processing");
+ return false;
+ }
plasmaSwitchboardQueue.Entry nextentry;
synchronized (sbQueue) {
if (sbQueue.size() == 0) {
@@ -565,12 +573,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
return false; // nothing to do
}
- // in case that the server is very busy we do not work off the queue too fast
- if (!(cacheManager.idle())) try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {}
-
// do one processing step
- log.logDebug("DEQUEUE: cacheManager=" + ((cacheManager.idle()) ? "idle" : "busy") +
- ", sbQueueSize=" + sbQueue.size() +
+ log.logDebug("DEQUEUE: sbQueueSize=" + sbQueue.size() +
", coreStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) +
", limitStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) +
", overhangStackSize=" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) +
@@ -666,7 +670,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
"cacheLoader=" + cacheLoader.size() + ")");
return false;
}
-
+ if (onlineCaution()) {
+ log.logDebug("CoreCrawl: online caution, omitting processing");
+ return false;
+ }
// if the server is busy, we do crawling more slowly
//if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
@@ -797,21 +804,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
//log.logDebug("GlobalCrawl: queue is empty");
return false;
}
- /*
- if (queueStack.size() > 0) {
- log.logDebug("GlobalCrawl: any processe is in queue, dismissed (" +
- "processStack=" + queueStack.size() + ")");
- return false;
- }
- if (noticeURL.coreStackSize() > 0) {
- log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" +
- "coreStackSize=" + noticeURL.coreStackSize() + ")");
+ if (onlineCaution()) {
+ log.logDebug("GlobalCrawl: online caution, omitting processing");
return false;
}
- */
-
- // if the server is busy, we do this more slowly
- //if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
// if crawling was paused we have to wait until we wer notified to continue
synchronized(this.crawlingPausedSync) {
diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
index 2a4fcd26b..35fed67a5 100644
--- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
@@ -125,18 +125,17 @@ public final class plasmaWordIndexAssortmentCluster {
return;
}
- // calculate appropriate cluster insert point
- int clusterStart = clusterCount;
- if ((((byte) wordHash.charAt(0)) & 1) == 1) {
- // for every second hash, place the entries in the middle of the assortments
- // this balances the entries within the assortments-cluster
- int cap = clusterCapacity - newContainer.size() - 2 * clusterCount;
- while (cap > 0) {
- cap -= clusterStart;
- clusterStart--;
- }
+ // calculate minimum cluster insert point
+ int clusterMinStart = clusterCount;
+ int cap = clusterCapacity - newContainer.size() - 2 * clusterCount;
+ while (cap > 0) {
+ cap -= clusterMinStart;
+ clusterMinStart--;
}
+ // point the real cluster insert point somewhere between the minimum and the maximum
+ int clusterStart = clusterCount - (int) (Math.random() * (clusterCount - clusterMinStart));
+
// do the insert
plasmaWordIndexEntryContainer c;
Iterator i = newContainer.entries();