diff --git a/htroot/Network.html b/htroot/Network.html index 75c1bb988..4a50d7c30 100644 --- a/htroot/Network.html +++ b/htroot/Network.html @@ -58,6 +58,7 @@ Sent
URLs
  Received
Words
  Received
URLs
  +Indexed Pages
per Minute
  #Seeds
  #Connects
per hour
  #{list}# @@ -83,6 +84,7 @@ #[sU]# #[rI]# #[rU]# +#[ppm]# #[seeds]# #[connects]# @@ -139,6 +141,7 @@ Sent
URLs Received
Words Received
URLs +PPM #Seeds #Connects
per hour @@ -155,6 +158,7 @@ #[my-sU]# #[my-rI]# #[my-rU]# +#[my-ppm]# #[my-seeds]# #[my-connects]# diff --git a/htroot/Network.java b/htroot/Network.java index 331530dd2..6f0903f1d 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -113,6 +113,7 @@ public class Network { prop.put("table_my-sU", seed.get("sU", "-")); prop.put("table_my-rI", seed.get("rI", "-")); prop.put("table_my-rU", seed.get("rU", "-")); + prop.put("table_my-ppm", seed.get("ISpeed", "-")); prop.put("table_my-seeds", seed.get("SCount", "-")); prop.put("table_my-connects", seed.get("CCount", "-")); } @@ -262,6 +263,7 @@ public class Network { prop.put("table_list_"+conCount+"_sU", seed.get("sU", "-")); prop.put("table_list_"+conCount+"_rI", seed.get("rI", "-")); prop.put("table_list_"+conCount+"_rU", seed.get("rU", "-")); + prop.put("table_list_"+conCount+"_ppm", seed.get("ISpeed", "-")); prop.put("table_list_"+conCount+"_seeds", seed.get("SCount", "-")); prop.put("table_list_"+conCount+"_connects", seed.get("CCount", "-")); conCount++; diff --git a/htroot/Status.html b/htroot/Status.html index 2833954bf..b9b8dc022 100644 --- a/htroot/Status.html +++ b/htroot/Status.html @@ -57,7 +57,7 @@ see the installatio #(peerStatistics)# Unknown :: -Uptime = #[uptime]#, Links# = #[links]#, RWIs# = #[words]#, +Uptime = #[uptime]#, Links# = #[links]#, RWIs# = #[words]#, PPM = #[pagesperminute]#, Connects (#[juniorConnects]#|#[seniorConnects]#|#[principalConnects]#|#[disconnects]#) #[connects]# peers/hour #(/peerStatistics)# diff --git a/htroot/Status.java b/htroot/Status.java index 7c6384887..a3a691b53 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -54,6 +54,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.serverDate; import de.anomic.yacy.yacyCore; +import de.anomic.plasma.plasmaSwitchboard; public class Status { @@ -117,8 +118,10 @@ public class Status { prop.put("peerAddress", 0);//not assigned prop.put("peerStatistics", 0);//unknown } else { + long uptime = 60000 * Long.parseLong(yacyCore.seedDB.mySeed.get("Uptime", "0")); prop.put("peerStatistics", 1); - prop.put("peerStatistics_uptime", serverDate.intervalToString(60000 * Long.parseLong(yacyCore.seedDB.mySeed.get("Uptime", "0")))); + prop.put("peerStatistics_uptime", serverDate.intervalToString(uptime)); + prop.put("peerStatistics_pagesperminute", yacyCore.seedDB.mySeed.get("ISpeed", "unknown")); prop.put("peerStatistics_links", yacyCore.seedDB.mySeed.get("LCount", "unknown")); prop.put("peerStatistics_words", yacyCore.seedDB.mySeed.get("ICount", "unknown")); prop.put("peerStatistics_juniorConnects", yacyCore.peerActions.juniorConnects); diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index 3ee22fbc2..852850ff7 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -191,21 +191,24 @@ public class plasmaCrawlNURL extends plasmaURL { String profile, int depth, int anchors, int forkfactor, int stackMode) { Entry e = new Entry(initiator, url, referrer, name, loaddate, profile, depth, anchors, forkfactor); - try { - switch (stackMode) { - case STACK_TYPE_CORE: coreStack.push(new byte[][] {e.hash.getBytes()}); break; - case STACK_TYPE_LIMIT: limitStack.push(new byte[][] {e.hash.getBytes()}); break; - case STACK_TYPE_OVERHANG: overhangStack.push(new byte[][] {e.hash.getBytes()}); break; - case STACK_TYPE_REMOTE: remoteStack.push(new byte[][] {e.hash.getBytes()}); break; - case STACK_TYPE_IMAGE: imageStack.push(new byte[][] {e.hash.getBytes()}); break; - case STACK_TYPE_MOVIE: movieStack.push(new byte[][] {e.hash.getBytes()}); break; - case STACK_TYPE_MUSIC: musicStack.push(new byte[][] {e.hash.getBytes()}); break; + push(stackMode, e.hash); + return e; + } + + private void push(int stackType, String hash) { + try { + switch (stackType) { + case STACK_TYPE_CORE: coreStack.push(new byte[][] {hash.getBytes()}); break; + case STACK_TYPE_LIMIT: limitStack.push(new byte[][] {hash.getBytes()}); break; + case STACK_TYPE_OVERHANG: overhangStack.push(new byte[][] {hash.getBytes()}); break; + case STACK_TYPE_REMOTE: remoteStack.push(new byte[][] {hash.getBytes()}); break; + case STACK_TYPE_IMAGE: imageStack.push(new byte[][] {hash.getBytes()}); break; + case STACK_TYPE_MOVIE: movieStack.push(new byte[][] {hash.getBytes()}); break; + case STACK_TYPE_MUSIC: musicStack.push(new byte[][] {hash.getBytes()}); break; default: break; } - stackIndex.add(new String(e.hash.getBytes())); - } catch (IOException er) { - } - return e; + stackIndex.add(hash); + } catch (IOException er) {} } public Entry[] top(int stackType, int count) { @@ -234,6 +237,19 @@ public class plasmaCrawlNURL extends plasmaURL { } } + public void shift(int fromStack, int toStack) throws IOException { + switch (fromStack) { + case STACK_TYPE_CORE: push(toStack, new String(coreStack.pop()[0])); return; + case STACK_TYPE_LIMIT: push(toStack, new String(limitStack.pop()[0])); return; + case STACK_TYPE_OVERHANG: push(toStack, new String(overhangStack.pop()[0])); return; + case STACK_TYPE_REMOTE: push(toStack, new String(remoteStack.pop()[0])); return; + case STACK_TYPE_IMAGE: push(toStack, new String(imageStack.pop()[0])); return; + case STACK_TYPE_MOVIE: push(toStack, new String(movieStack.pop()[0])); return; + case STACK_TYPE_MUSIC: push(toStack, new String(musicStack.pop()[0])); return; + default: return; + } + } + private Entry pop(kelondroStack stack) { // this is a filo - pop try { diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index ffca5e78a..f0fb7d2cb 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -205,7 +205,7 @@ public final class plasmaHTCache { File f; while (currCacheSize > maxCacheSize) { f = (File) cacheAge.remove(cacheAge.firstKey()); - if (f.exists()) { + if ((f != null) && (f.exists())) { currCacheSize -= f.length(); if (f.delete()) { log.logInfo("DELETED OLD CACHE : " + f.toString()); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index c1e0292ab..9a1800433 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -711,6 +711,20 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser //log.logDebug("LimitCrawl: queue is empty"); return false; } + + if ((coreCrawlJobSize() == 0) && (limitCrawlTriggerJobSize() > 100)) { + // it is not efficient if the core crawl job is empty and we have too much to do + // move some tasks to the core crawl job + int toshift = limitCrawlTriggerJobSize() / 10; + if (toshift > 1000) toshift = 1000; + try { + for (int i = 0; i < toshift; i++) { + urlPool.noticeURL.shift(plasmaCrawlNURL.STACK_TYPE_LIMIT, plasmaCrawlNURL.STACK_TYPE_CORE); + } + log.logInfo("shifted " + toshift + " jobs from global crawl to local crawl"); + } catch (IOException e) {} + } + // if the server is busy, we do crawling more slowly //if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {} diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java index cda365d51..fab9b6dd2 100644 --- a/source/de/anomic/yacy/yacyCore.java +++ b/source/de/anomic/yacy/yacyCore.java @@ -383,7 +383,7 @@ public class yacyCore { // holding a reference to all started threads int contactedSeedCount = 0; - List syncList = Collections.synchronizedList(new LinkedList()); // memory for threads + List syncList = Collections.synchronizedList(new LinkedList()); // memory for threads serverSemaphore sync = new serverSemaphore(attempts); // going through the peer list and starting a new publisher thread for each peer diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index a94150191..2ffbb0089 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -99,8 +99,9 @@ public class yacyPeerActions { } else { seedDB.mySeed.put("Port", sb.getConfig("port", "8080")); } - seedDB.mySeed.put("ISpeed", "unknown"); // the speed of indexing (words/minute) of the peer long uptime = ((yacyCore.universalTime() - Long.parseLong(sb.getConfig("startupTime", "0"))) / 1000) / 60; + long indexedc = sb.getThread("80_indexing").getBusyCycles(); + seedDB.mySeed.put("ISpeed", ((indexedc == 0) || (uptime == 0)) ? "unknown" : ("" + (indexedc / uptime))); // the speed of indexing (pages/minute) of the peer seedDB.mySeed.put("Uptime", "" + uptime); // the number of minutes that the peer is up in minutes/day (moving average MA30) seedDB.mySeed.put("LCount", "" + sb.urlPool.loadedURL.size()); // the number of links that the peer has stored (LURL's) seedDB.mySeed.put("NCount", "" + sb.urlPool.noticeURL.stackSize()); // the number of links that the peer has noticed, but not loaded (NURL's) diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index ee4c58150..ea7b33a40 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -106,7 +106,7 @@ public class yacySeed { dna.put("Name", "∅"); // the name that the peer has given itself dna.put("BDate", "∅"); // birthdate - first startup // later during operation - - dna.put("ISpeed", "0"); // the speed of indexing (words/minute) of the peer + dna.put("ISpeed", "0"); // the speed of indexing (pages/minute) of the peer dna.put("Uptime", "0"); // the number of minutes that the peer is up in minutes/day (moving average MA30) dna.put("LCount", "0"); // the number of links that the peer has stored (LURL's) dna.put("NCount", "0"); // the number of links that the peer has noticed, but not loaded (NURL's)