diff --git a/htroot/PerformanceMemory_p.html b/htroot/PerformanceMemory_p.html index a9cf922d9..4a96a6ed8 100644 --- a/htroot/PerformanceMemory_p.html +++ b/htroot/PerformanceMemory_p.html @@ -255,6 +255,23 @@ Increasing this cache may speed up access to the wiki pages. Increasing this cache may speed up the peer-ping. + +Robots.txt DB +#[chunkRobots]# +#[slreqRobots]# +#[slempRobots]# +#[slhigRobots]# +#[slmedRobots]# +#[sllowRobots]# +#[usedRobots]# + +#[dfltRobots]# +#[goodRobots]# +#[bestRobots]# +The Robots.txt DB stores downloaded records from robots.txt files. +Increasing this cache may speed up validation if crawling of the URL is allowed. + + Totals #[usedTotal]# MB diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 36a279007..a17f14577 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -43,19 +43,15 @@ //javac -classpath .:../classes PerformanceMemory_p.java //if the shell's current path is HTROOT -import java.util.Iterator; import java.util.Map; import java.io.File; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; -import de.anomic.server.serverThread; import de.anomic.server.serverFileUtils; import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeedDB; public class PerformanceMemory_p { @@ -88,6 +84,7 @@ public class PerformanceMemory_p { env.setConfig("ramCacheMessage", Long.parseLong(post.get("ramCacheMessage", "0")) * KB); env.setConfig("ramCacheWiki", Long.parseLong(post.get("ramCacheWiki", "0")) * KB); env.setConfig("ramCacheNews", Long.parseLong(post.get("ramCacheNews", "0")) * KB); + env.setConfig("ramCacheRobots", Long.parseLong(post.get("ramCacheRobots", "0")) * KB); } if (post.containsKey("setDefault")) { env.setConfig("ramCacheRWI", Long.parseLong((String) defaultSettings.get("ramCacheRWI"))); @@ -99,6 +96,7 @@ public class PerformanceMemory_p { env.setConfig("ramCacheMessage", Long.parseLong((String) defaultSettings.get("ramCacheMessage"))); env.setConfig("ramCacheWiki", Long.parseLong((String) defaultSettings.get("ramCacheWiki"))); env.setConfig("ramCacheNews", Long.parseLong((String) defaultSettings.get("ramCacheNews"))); + env.setConfig("ramCacheRobots", Long.parseLong((String) defaultSettings.get("ramCacheRobots"))); } if (post.containsKey("setGood")) set = "setGood"; if (post.containsKey("setBest")) set = "setBest"; @@ -193,6 +191,11 @@ public class PerformanceMemory_p { slt = yacyCore.newsPool.dbCacheFillStatus(); putprop(prop, env, "News", set); + req = sb.robots.size(); + chk = sb.robots.dbCacheChunkSize(); + slt = sb.robots.dbCacheFillStatus(); + putprop(prop, env, "Robots", set); + prop.put("usedTotal", usedTotal / MB); prop.put("currTotal", currTotal / MB); prop.put("dfltTotal", dfltTotal / MB); diff --git a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java index a80d870b1..6134c1100 100644 --- a/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java +++ b/source/de/anomic/plasma/plasmaCrawlRobotsTxt.java @@ -57,28 +57,37 @@ import java.util.Map; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroMap; import de.anomic.kelondro.kelondroException; +import de.anomic.kelondro.kelondroRecords; import de.anomic.server.logging.serverLog; public class plasmaCrawlRobotsTxt { private kelondroMap robotsTable; private File robotsTableFile; - public plasmaCrawlRobotsTxt(File robotsTableFile) throws IOException { + public plasmaCrawlRobotsTxt(File robotsTableFile, int bufferkb) throws IOException { this.robotsTableFile = robotsTableFile; if (robotsTableFile.exists()) { try { - robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, 1000000)); + robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, bufferkb * 1024)); } catch (kelondroException e) { robotsTableFile.delete(); robotsTableFile.getParentFile().mkdirs(); - robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, 1000000, 256, 512)); + robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, bufferkb * 1024, 256, 512)); } } else { robotsTableFile.getParentFile().mkdirs(); - robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, 1000000, 256, 512)); + robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, bufferkb * 1024, 256, 512)); } } + public int[] dbCacheChunkSize() { + return robotsTable.cacheChunkSize(); + } + + public int[] dbCacheFillStatus() { + return robotsTable.cacheFillStatus(); + } + private void resetDatabase() { // deletes the robots.txt database and creates a new one if (robotsTable != null) try { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index b84065f4e..9aebd3135 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -255,6 +255,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser int ramHTTP = (int) getConfigLong("ramCacheHTTP", 1024) / 1024; int ramMessage = (int) getConfigLong("ramCacheMessage", 1024) / 1024; int ramWiki = (int) getConfigLong("ramCacheWiki", 1024) / 1024; + int ramRobots = (int) getConfigLong("ramCacheRobots",1024) / 1024; this.log.logConfig("LURL Cache memory = " + ppRamString(ramLURL)); this.log.logConfig("NURL Cache memory = " + ppRamString(ramNURL)); this.log.logConfig("EURL Cache memory = " + ppRamString(ramEURL)); @@ -262,6 +263,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser this.log.logConfig("HTTP Cache memory = " + ppRamString(ramHTTP)); this.log.logConfig("Message Cache memory = " + ppRamString(ramMessage)); this.log.logConfig("Wiki Cache memory = " + ppRamString(ramWiki)); + this.log.logConfig("Robots Cache memory = " + ppRamString(ramRobots)); // make crawl profiles database and default profiles this.log.logConfig("Initializing Crawl Profiles"); @@ -273,7 +275,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // loading the robots.txt db this.log.logConfig("Initializing robots.txt DB"); File robotsDBFile = new File(this.plasmaPath, "crawlRobotsTxt.db"); - this.robots = new plasmaCrawlRobotsTxt(robotsDBFile); + this.robots = new plasmaCrawlRobotsTxt(robotsDBFile, ramRobots); this.log.logConfig("Loaded robots.txt DB from file " + robotsDBFile + ", " + this.robots.size() + " entries"); // start indexing management diff --git a/yacy.init b/yacy.init index 3bb92cf1d..b46805955 100644 --- a/yacy.init +++ b/yacy.init @@ -464,6 +464,9 @@ ramCacheWiki = 8192 # ram cache for news1.db ramCacheNews = 8192 +# ram cache for robotsTxt.db +ramCacheRobots = 1048576 + # default memory settings for startup of yacy # is only valid in unix/shell environments and # not for first startup of YaCy