*) Adding robots.txt db to Performance Settings for Memory menue

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@785 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 3274ae725e
commit fe6a6abc0b

@ -255,6 +255,23 @@ Increasing this cache may speed up access to the wiki pages.</td>
Increasing this cache may speed up the peer-ping.</td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">Robots.txt DB</td>
<td class="small" align="center">#[chunkRobots]#</td>
<td class="small" align="right">#[slreqRobots]#</td>
<td class="small" align="right">#[slempRobots]#</td>
<td class="small" align="right">#[slhigRobots]#</td>
<td class="small" align="right">#[slmedRobots]#</td>
<td class="small" align="right">#[sllowRobots]#</td>
<td class="small" align="right">#[usedRobots]#</td>
<td class="small" align="right"><input name="ramCacheRobots" type="text" align="right" size="5" maxlength="6" value="#[ramCacheRobots]#"></td>
<td class="small" align="right">#[dfltRobots]#</td>
<td class="small" align="right">#[goodRobots]#</td>
<td class="small" align="right">#[bestRobots]#</td>
<td class="small" align="left">The Robots.txt DB stores downloaded records from robots.txt files.
Increasing this cache may speed up validation if crawling of the URL is allowed.</td>
</tr>
<tr class="TableCellSummary">
<td class="small" align="left" colspan="7">Totals</td>
<td class="small" align="right">#[usedTotal]# MB</td>

@ -43,19 +43,15 @@
//javac -classpath .:../classes PerformanceMemory_p.java
//if the shell's current path is HTROOT
import java.util.Iterator;
import java.util.Map;
import java.io.File;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverThread;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeedDB;
public class PerformanceMemory_p {
@ -88,6 +84,7 @@ public class PerformanceMemory_p {
env.setConfig("ramCacheMessage", Long.parseLong(post.get("ramCacheMessage", "0")) * KB);
env.setConfig("ramCacheWiki", Long.parseLong(post.get("ramCacheWiki", "0")) * KB);
env.setConfig("ramCacheNews", Long.parseLong(post.get("ramCacheNews", "0")) * KB);
env.setConfig("ramCacheRobots", Long.parseLong(post.get("ramCacheRobots", "0")) * KB);
}
if (post.containsKey("setDefault")) {
env.setConfig("ramCacheRWI", Long.parseLong((String) defaultSettings.get("ramCacheRWI")));
@ -99,6 +96,7 @@ public class PerformanceMemory_p {
env.setConfig("ramCacheMessage", Long.parseLong((String) defaultSettings.get("ramCacheMessage")));
env.setConfig("ramCacheWiki", Long.parseLong((String) defaultSettings.get("ramCacheWiki")));
env.setConfig("ramCacheNews", Long.parseLong((String) defaultSettings.get("ramCacheNews")));
env.setConfig("ramCacheRobots", Long.parseLong((String) defaultSettings.get("ramCacheRobots")));
}
if (post.containsKey("setGood")) set = "setGood";
if (post.containsKey("setBest")) set = "setBest";
@ -193,6 +191,11 @@ public class PerformanceMemory_p {
slt = yacyCore.newsPool.dbCacheFillStatus();
putprop(prop, env, "News", set);
req = sb.robots.size();
chk = sb.robots.dbCacheChunkSize();
slt = sb.robots.dbCacheFillStatus();
putprop(prop, env, "Robots", set);
prop.put("usedTotal", usedTotal / MB);
prop.put("currTotal", currTotal / MB);
prop.put("dfltTotal", dfltTotal / MB);

@ -57,26 +57,35 @@ import java.util.Map;
import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroMap;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.server.logging.serverLog;
public class plasmaCrawlRobotsTxt {
private kelondroMap robotsTable;
private File robotsTableFile;
public plasmaCrawlRobotsTxt(File robotsTableFile) throws IOException {
public plasmaCrawlRobotsTxt(File robotsTableFile, int bufferkb) throws IOException {
this.robotsTableFile = robotsTableFile;
if (robotsTableFile.exists()) {
try {
robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, 1000000));
robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, bufferkb * 1024));
} catch (kelondroException e) {
robotsTableFile.delete();
robotsTableFile.getParentFile().mkdirs();
robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, 1000000, 256, 512));
robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, bufferkb * 1024, 256, 512));
}
} else {
robotsTableFile.getParentFile().mkdirs();
robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, 1000000, 256, 512));
robotsTable = new kelondroMap(new kelondroDyn(robotsTableFile, bufferkb * 1024, 256, 512));
}
}
public int[] dbCacheChunkSize() {
return robotsTable.cacheChunkSize();
}
public int[] dbCacheFillStatus() {
return robotsTable.cacheFillStatus();
}
private void resetDatabase() {

@ -255,6 +255,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
int ramHTTP = (int) getConfigLong("ramCacheHTTP", 1024) / 1024;
int ramMessage = (int) getConfigLong("ramCacheMessage", 1024) / 1024;
int ramWiki = (int) getConfigLong("ramCacheWiki", 1024) / 1024;
int ramRobots = (int) getConfigLong("ramCacheRobots",1024) / 1024;
this.log.logConfig("LURL Cache memory = " + ppRamString(ramLURL));
this.log.logConfig("NURL Cache memory = " + ppRamString(ramNURL));
this.log.logConfig("EURL Cache memory = " + ppRamString(ramEURL));
@ -262,6 +263,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
this.log.logConfig("HTTP Cache memory = " + ppRamString(ramHTTP));
this.log.logConfig("Message Cache memory = " + ppRamString(ramMessage));
this.log.logConfig("Wiki Cache memory = " + ppRamString(ramWiki));
this.log.logConfig("Robots Cache memory = " + ppRamString(ramRobots));
// make crawl profiles database and default profiles
this.log.logConfig("Initializing Crawl Profiles");
@ -273,7 +275,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// loading the robots.txt db
this.log.logConfig("Initializing robots.txt DB");
File robotsDBFile = new File(this.plasmaPath, "crawlRobotsTxt.db");
this.robots = new plasmaCrawlRobotsTxt(robotsDBFile);
this.robots = new plasmaCrawlRobotsTxt(robotsDBFile, ramRobots);
this.log.logConfig("Loaded robots.txt DB from file " + robotsDBFile + ", " + this.robots.size() + " entries");
// start indexing management

@ -464,6 +464,9 @@ ramCacheWiki = 8192
# ram cache for news1.db
ramCacheNews = 8192
# ram cache for robotsTxt.db
ramCacheRobots = 1048576
# default memory settings for startup of yacy
# is only valid in unix/shell environments and
# not for first startup of YaCy

Loading…
Cancel
Save