@@ -126,7 +126,7 @@ cache will speed up crawls with a depth > 3.
#[slmedHTTP]#
#[sllowHTTP]#
#[usedHTTP]#
-
+
#[dfltHTTP]#
#[goodHTTP]#
#[bestHTTP]#
@@ -144,7 +144,7 @@ Increasing this cache will be most important for a fast proxy mode.
#[slmedLURL]#
#[sllowLURL]#
#[usedLURL]#
-
+
#[dfltLURL]#
#[goodLURL]#
#[bestLURL]#
@@ -161,7 +161,7 @@ This cache is very important for a fast search process. Increasing the cache siz
#[slmedNURL]#
#[sllowNURL]#
#[usedNURL]#
-
+
#[dfltNURL]#
#[goodNURL]#
#[bestNURL]#
@@ -178,7 +178,7 @@ Increasing the cache size will result in faster double-check during URL recognit
#[slmedEURL]#
#[sllowEURL]#
#[usedEURL]#
-
+
#[dfltEURL]#
#[goodEURL]#
#[bestEURL]#
@@ -195,7 +195,7 @@ Increasing the cache size will most probably speed up crawling slightly, but not
#[slmedDHT]#
#[sllowDHT]#
#[usedDHT]#
-
+
#[dfltDHT]#
#[goodDHT]#
#[bestDHT]#
@@ -213,7 +213,7 @@ Increasing this cache may speed up many functions, but we need to test this to s
#[slmedMessage]#
#[sllowMessage]#
#[usedMessage]#
-
+
#[dfltMessage]#
#[goodMessage]#
#[bestMessage]#
@@ -229,7 +229,7 @@ Increasing this cache may speed up many functions, but we need to test this to s
#[slmedWiki]#
#[sllowWiki]#
#[usedWiki]#
-
+
#[dfltWiki]#
#[goodWiki]#
#[bestWiki]#
@@ -247,7 +247,7 @@ Increasing this cache may speed up access to the wiki pages.
#[slmedNews]#
#[sllowNews]#
#[usedNews]#
-
+
#[dfltNews]#
#[goodNews]#
#[bestNews]#
@@ -256,7 +256,7 @@ Increasing this cache may speed up the peer-ping.
-
Robots.txt DB
+
robots.txt DB
#[chunkRobots]#
#[slreqRobots]#
#[slempRobots]#
@@ -264,14 +264,31 @@ Increasing this cache may speed up the peer-ping.
#[slmedRobots]#
#[sllowRobots]#
#[usedRobots]#
-
+
#[dfltRobots]#
#[goodRobots]#
#[bestRobots]#
-
The Robots.txt DB stores downloaded records from robots.txt files.
+
The robots.txt DB stores downloaded records from robots.txt files.
Increasing this cache may speed up validation if crawling of the URL is allowed.
+
+
Crawl Profiles
+
#[chunkProfiles]#
+
#[slreqProfiles]#
+
#[slempProfiles]#
+
#[slhigProfiles]#
+
#[slmedProfiles]#
+
#[sllowProfiles]#
+
#[usedProfiles]#
+
+
#[dfltProfiles]#
+
#[goodProfiles]#
+
#[bestProfiles]#
+
The profile database stores properties for each crawl that is started on the local peer.
+Increasing this cache mey speed up crawling, but not much space is needed.
+
+
Totals
#[usedTotal]# MB
diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java
index a17f14577..394831ec7 100644
--- a/htroot/PerformanceMemory_p.java
+++ b/htroot/PerformanceMemory_p.java
@@ -85,6 +85,7 @@ public class PerformanceMemory_p {
env.setConfig("ramCacheWiki", Long.parseLong(post.get("ramCacheWiki", "0")) * KB);
env.setConfig("ramCacheNews", Long.parseLong(post.get("ramCacheNews", "0")) * KB);
env.setConfig("ramCacheRobots", Long.parseLong(post.get("ramCacheRobots", "0")) * KB);
+ env.setConfig("ramCacheProfiles", Long.parseLong(post.get("ramCacheProfiles", "0")) * KB);
}
if (post.containsKey("setDefault")) {
env.setConfig("ramCacheRWI", Long.parseLong((String) defaultSettings.get("ramCacheRWI")));
@@ -97,6 +98,7 @@ public class PerformanceMemory_p {
env.setConfig("ramCacheWiki", Long.parseLong((String) defaultSettings.get("ramCacheWiki")));
env.setConfig("ramCacheNews", Long.parseLong((String) defaultSettings.get("ramCacheNews")));
env.setConfig("ramCacheRobots", Long.parseLong((String) defaultSettings.get("ramCacheRobots")));
+ env.setConfig("ramCacheProfiles", Long.parseLong((String) defaultSettings.get("ramCacheProfiles")));
}
if (post.containsKey("setGood")) set = "setGood";
if (post.containsKey("setBest")) set = "setBest";
@@ -196,6 +198,11 @@ public class PerformanceMemory_p {
slt = sb.robots.dbCacheFillStatus();
putprop(prop, env, "Robots", set);
+ req = sb.profiles.size();
+ chk = sb.profiles.dbCacheChunkSize();
+ slt = sb.profiles.dbCacheFillStatus();
+ putprop(prop, env, "Profiles", set);
+
prop.put("usedTotal", usedTotal / MB);
prop.put("currTotal", currTotal / MB);
prop.put("dfltTotal", dfltTotal / MB);
diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java
index f0c41eef3..accc66d02 100644
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@@ -58,17 +58,26 @@ public class plasmaCrawlProfile {
private kelondroMap profileTable;
private File profileTableFile;
+ private int bufferkb;
- public plasmaCrawlProfile(File profileTableFile) throws IOException {
+ public plasmaCrawlProfile(File profileTableFile, int bufferkb) throws IOException {
this.profileTableFile = profileTableFile;
if (profileTableFile.exists()) {
- profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000));
+ profileTable = new kelondroMap(new kelondroDyn(profileTableFile, bufferkb * 1024));
} else {
profileTableFile.getParentFile().mkdirs();
- profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000, plasmaURL.urlCrawlProfileHandleLength, 2000));
+ profileTable = new kelondroMap(new kelondroDyn(profileTableFile, bufferkb * 1024, plasmaURL.urlCrawlProfileHandleLength, 2000));
}
}
+ public int[] dbCacheChunkSize() {
+ return profileTable.cacheChunkSize();
+ }
+
+ public int[] dbCacheFillStatus() {
+ return profileTable.cacheFillStatus();
+ }
+
private void resetDatabase() {
// deletes the profile database and creates a new one
if (profileTable != null) try {
@@ -77,7 +86,7 @@ public class plasmaCrawlProfile {
if (!(profileTableFile.delete())) throw new RuntimeException("cannot delete crawl profile database");
try {
profileTableFile.getParentFile().mkdirs();
- profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000, plasmaURL.urlCrawlProfileHandleLength, 2000));
+ profileTable = new kelondroMap(new kelondroDyn(profileTableFile, bufferkb * 1024, plasmaURL.urlCrawlProfileHandleLength, 2000));
} catch (IOException e){
serverLog.logSevere("PLASMA", "plasmaCrawlProfile.resetDatabase", e);
}
diff --git a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
index 6134c1100..1cfc8ebd3 100644
--- a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
+++ b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java
@@ -63,9 +63,11 @@ import de.anomic.server.logging.serverLog;
public class plasmaCrawlRobotsTxt {
private kelondroMap robotsTable;
private File robotsTableFile;
+ private int bufferkb;
public plasmaCrawlRobotsTxt(File robotsTableFile, int bufferkb) throws IOException {
this.robotsTableFile = robotsTableFile;
+ this.bufferkb = bufferkb;
if (robotsTableFile.exists()) {
try {
robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, bufferkb * 1024));
@@ -96,7 +98,7 @@ public class plasmaCrawlRobotsTxt {
if (!(robotsTableFile.delete())) throw new RuntimeException("cannot delete robots.txt database");
try {
robotsTableFile.getParentFile().mkdirs();
- robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, 1000000, 256, 512));
+ robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, this.bufferkb, 256, 512));
} catch (IOException e){
serverLog.logSevere("PLASMA", "robotsTxt.resetDatabase", e);
}
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 9aebd3135..36fbdf360 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -256,19 +256,21 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
int ramMessage = (int) getConfigLong("ramCacheMessage", 1024) / 1024;
int ramWiki = (int) getConfigLong("ramCacheWiki", 1024) / 1024;
int ramRobots = (int) getConfigLong("ramCacheRobots",1024) / 1024;
- this.log.logConfig("LURL Cache memory = " + ppRamString(ramLURL));
- this.log.logConfig("NURL Cache memory = " + ppRamString(ramNURL));
- this.log.logConfig("EURL Cache memory = " + ppRamString(ramEURL));
- this.log.logConfig("RWI Cache memory = " + ppRamString(ramRWI));
- this.log.logConfig("HTTP Cache memory = " + ppRamString(ramHTTP));
- this.log.logConfig("Message Cache memory = " + ppRamString(ramMessage));
- this.log.logConfig("Wiki Cache memory = " + ppRamString(ramWiki));
- this.log.logConfig("Robots Cache memory = " + ppRamString(ramRobots));
+ int ramProfiles= (int) getConfigLong("ramCacheProfiles",1024) / 1024;
+ this.log.logConfig("LURL Cache memory = " + ppRamString(ramLURL));
+ this.log.logConfig("NURL Cache memory = " + ppRamString(ramNURL));
+ this.log.logConfig("EURL Cache memory = " + ppRamString(ramEURL));
+ this.log.logConfig("RWI Cache memory = " + ppRamString(ramRWI));
+ this.log.logConfig("HTTP Cache memory = " + ppRamString(ramHTTP));
+ this.log.logConfig("Message Cache memory = " + ppRamString(ramMessage));
+ this.log.logConfig("Wiki Cache memory = " + ppRamString(ramWiki));
+ this.log.logConfig("Robots Cache memory = " + ppRamString(ramRobots));
+ this.log.logConfig("Profiles Cache memory = " + ppRamString(ramProfiles));
// make crawl profiles database and default profiles
this.log.logConfig("Initializing Crawl Profiles");
File profilesFile = new File(this.plasmaPath, "crawlProfiles0.db");
- this.profiles = new plasmaCrawlProfile(profilesFile);
+ this.profiles = new plasmaCrawlProfile(profilesFile, ramProfiles);
initProfiles();
log.logConfig("Loaded profiles from file " + profilesFile + ", " + this.profiles.size() + " entries");
@@ -501,7 +503,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
final File pdb = new File(plasmaPath, "crawlProfiles0.db");
if (pdb.exists()) pdb.delete();
try {
- profiles = new plasmaCrawlProfile(pdb);
+ int ramProfiles = (int) getConfigLong("ramCacheProfiles",1024) / 1024;
+ profiles = new plasmaCrawlProfile(pdb, ramProfiles);
initProfiles();
} catch (IOException e) {}
}
diff --git a/yacy.init b/yacy.init
index b46805955..5fca7ffd5 100644
--- a/yacy.init
+++ b/yacy.init
@@ -465,7 +465,10 @@ ramCacheWiki = 8192
ramCacheNews = 8192
# ram cache for robotsTxt.db
-ramCacheRobots = 1048576
+ramCacheRobots = 2097152
+
+# ram cache for crawlProfile.db
+ramCacheProfiles = 8192
# default memory settings for startup of yacy
# is only valid in unix/shell environments and