*) proxyCache, proxyCacheSize can be changed under 'Proxy Indexing'

- path now are absolute
*) move path check from plasmaHTCache to plasmaSwitchboard
   - only one path check when starting
*) small other

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@606 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
borg-0300 20 years ago
parent 3dfda1c9da
commit bf14e6def5

@ -50,7 +50,6 @@ import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterOutputStream; import de.anomic.htmlFilter.htmlFilterOutputStream;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
@ -67,18 +66,20 @@ public class CacheAdmin_p {
private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
public static String dateString(Date date) { public static String dateString(Date date) {
return SimpleFormatter.format(date); return SimpleFormatter.format(date);
} }
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
plasmaSwitchboard switchboard = (plasmaSwitchboard) env; plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
String action = ((post == null) ? "info" : post.get("action", "info")); String action = ((post == null) ? "info" : post.get("action", "info"));
String pathString = ((post == null) ? "" : post.get("path", "/")); String pathString = ((post == null) ? "" : post.get("path", "/"));
String fileString = pathString; String fileString = pathString;
File cache = new File(switchboard.getRootPath(), switchboard.getConfig("proxyCache", "DATA/HTCACHE"));
// we dont need check the path, because we have do that in plasmaSwitchboard.java - Borg-0300
File cache = new File(switchboard.getConfig("proxyCache", "DATA/HTCACHE").toString());
File file = new File(cache, pathString); File file = new File(cache, pathString);
File dir; File dir;
URL url = plasmaHTCache.getURL(cache, file); URL url = plasmaHTCache.getURL(cache, file);
@ -89,6 +90,7 @@ public class CacheAdmin_p {
dir = file.getParentFile(); dir = file.getParentFile();
pathString = (new File(pathString)).getParent().replace('\\','/'); pathString = (new File(pathString)).getParent().replace('\\','/');
} }
// generate dir listing // generate dir listing
String[] list = dir.list(); String[] list = dir.list();
File f; String tree = "Directory of<br>" + ((pathString.length() == 0) ? "domain list" : linkPathString(pathString)) + "<br><br>"; File f; String tree = "Directory of<br>" + ((pathString.length() == 0) ? "domain list" : linkPathString(pathString)) + "<br><br>";
@ -97,10 +99,11 @@ public class CacheAdmin_p {
else { else {
for (int i = 0; i < list.length; i++) { for (int i = 0; i < list.length; i++) {
f = new File(dir, list[i]); f = new File(dir, list[i]);
if (f.isDirectory()) if (!f.getName().equalsIgnoreCase("responseHeader.db"))
tree += "<img src=\"/env/grafics/folderIconSmall.gif\" align=\"top\" alt=\"Folder\">&nbsp;<a href=\"CacheAdmin_p.html?action=info&path=" + pathString + "/" + list[i] + "\" class=\"tt\">" + list[i] + "</a><br>" + serverCore.crlfString; if (f.isDirectory())
else tree += "<img src=\"/env/grafics/folderIconSmall.gif\" align=\"top\" alt=\"Folder\">&nbsp;<a href=\"CacheAdmin_p.html?action=info&path=" + pathString + "/" + list[i] + "\" class=\"tt\">" + list[i] + "</a><br>" + serverCore.crlfString;
tree += "<img src=\"/env/grafics/fileIconSmall.gif\" align=\"top\" alt=\"File\">&nbsp;<a href=\"CacheAdmin_p.html?action=info&path=" + pathString + "/" + list[i] + "\" class=\"tt\">" + list[i] + "</a><br>" + serverCore.crlfString; else
tree += "<img src=\"/env/grafics/fileIconSmall.gif\" align=\"top\" alt=\"File\">&nbsp;<a href=\"CacheAdmin_p.html?action=info&path=" + pathString + "/" + list[i] + "\" class=\"tt\">" + list[i] + "</a><br>" + serverCore.crlfString;
} }
} }
@ -108,7 +111,7 @@ public class CacheAdmin_p {
if (action.equals("info")) { if (action.equals("info")) {
if (!(file.isDirectory())) { if (!(file.isDirectory())) {
String urls = htmlFilterContentScraper.urlNormalform(url); String urls = htmlFilterContentScraper.urlNormalform(url);
info += "<b>Info for URL <a href=\"" + urls + "\">" + urls + "</a>:</b><br><br>"; info += "<b>Info for URL <a href=\"" + urls + "\">" + urls + "</a>:</b><br><br>";
try { try {
httpHeader fileheader = switchboard.cacheManager.getCachedResponse(plasmaURL.urlHash(url)); httpHeader fileheader = switchboard.cacheManager.getCachedResponse(plasmaURL.urlHash(url));
@ -141,13 +144,12 @@ public class CacheAdmin_p {
} }
} }
// prop.put("cachesize", Long.toString(switchboard.cacheManager.currCacheSize/1024));
prop.put("cachesize", Long.toString(switchboard.cacheManager.currCacheSize/1024)); prop.put("cachemax", Long.toString(switchboard.cacheManager.maxCacheSize/1024));
prop.put("cachemax", Long.toString(switchboard.cacheManager.maxCacheSize/1024));
prop.put("tree", tree); prop.put("tree", tree);
prop.put("info", info); prop.put("info", info);
// return rewrite properties // return rewrite properties
return prop; return prop;
} }
private static String formatHeader(httpHeader header) { private static String formatHeader(httpHeader header) {
@ -193,5 +195,4 @@ public class CacheAdmin_p {
} }
return result; return result;
} }
} }

@ -41,7 +41,7 @@
// You must compile this file with // You must compile this file with
// javac -classpath .:../Classes Message.java // javac -classpath .:../classes CacheResource_p.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.File; import java.io.File;
@ -56,11 +56,14 @@ import de.anomic.server.serverSwitch;
public class CacheResource_p { public class CacheResource_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
plasmaSwitchboard switchboard = (plasmaSwitchboard) env; plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
String path = ((post == null) ? "" : post.get("path", ""));
// we dont need check the path, because we have do that in plasmaSwitchboard.java - Borg-0300
File cache = new File(switchboard.getConfig("proxyCache", "DATA/HTCACHE").toString());
String path = ((post == null) ? "" : post.get("path", ""));
File cache = new File(switchboard.getRootPath(), switchboard.getConfig("proxyCache", "DATA/HTCACHE"));
File f = new File(cache, path); File f = new File(cache, path);
byte[] resource; byte[] resource;
@ -70,7 +73,6 @@ public class CacheResource_p {
} catch (IOException e) { } catch (IOException e) {
prop.put("resource", new byte[0]); prop.put("resource", new byte[0]);
} }
return prop; return prop;
} }
} }

@ -18,12 +18,16 @@ and automatically excluded from indexing.
</p> </p>
<p><form action="ProxyIndexingMonitor_p.html" method="post" enctype="multipart/form-data"> <p><form action="ProxyIndexingMonitor_p.html" method="post" enctype="multipart/form-data">
<div class=small><b>Proxy pre-fetch setting:</b>
this is an automated html page loading procedure that takes actual proxy-requested
URLs as crawling start points for crawling.</div>
<table border="0" cellpadding="5" cellspacing="0" width="100%"> <table border="0" cellpadding="5" cellspacing="0" width="100%">
<tr class="TableCellLight">
<td colspan="3"><div class=small><b>Proxy pre-fetch setting:</b>
this is an automated html page loading procedure that takes actual proxy-requested
URLs as crawling start points for crawling.</div></td>
</tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td class=small>Prefetch Depth:</td> <td class=small>Prefetch Depth</td>
<td class=small><input name="proxyPrefetchDepth" type="text" size="2" maxlength="2" value="#[proxyPrefetchDepth]#"></td> <td class=small><input name="proxyPrefetchDepth" type="text" size="2" maxlength="2" value="#[proxyPrefetchDepth]#"></td>
<td class=small> <td class=small>
A prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all A prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all
@ -31,29 +35,52 @@ URLs as crawling start points for crawling.</div>
this means that only embedded href-anchors are prefetched additionally.</td> this means that only embedded href-anchors are prefetched additionally.</td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td class=small>Store to Cache:</td> <td class=small>Store to Cache</td>
<td class=small><input type="checkbox" name="proxyStoreHTCache" align="top" #(proxyStoreHTCacheChecked)#::checked#(/proxyStoreHTCacheChecked)#></td> <td class=small><input type="checkbox" name="proxyStoreHTCache" align="top" #(proxyStoreHTCacheChecked)#::checked#(/proxyStoreHTCacheChecked)#></td>
<td class=small>It is almost always recommended to set this on. The only exception is that you have another caching proxy running as secondary proxy and YaCy is configured to used that proxy in proxy-proxy - mode.</td> <td class=small>It is almost always recommended to set this on. The only exception is that you have another caching proxy running as secondary proxy and YaCy is configured to used that proxy in proxy-proxy - mode.</td>
</tr> </tr>
<tr class="TableCellLight">
<td colspan="3"><div class=small><b>Proxy generally</b></div></td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Path</td>
<td class=small><input name="proxyCache" type="text" size="20" maxlength="80" value="#[proxyCache]#"></td>
<td class=small>The path where the pages are stored (max. length 80)</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Size</td>
<td class=small><input name="proxyCacheSize" type="text" size="8" maxlength="24" value="#[proxyCacheSize]#"></td>
<td class=small>The size in MB of the cache.</td>
</tr>
<tr valign="top" class="TableCellLight"> <tr valign="top" class="TableCellLight">
<td class=small></td> <td class=small colspan="1">&nbsp;</td>
<td class=small></td> <td class=small colspan="2"><input type="submit" name="proxyprofileset" value="set proxy profile"></td>
<td class=small><input type="submit" name="proxyprofileset" value="set proxy profile"></td>
</tr> </tr>
</table> </table>
</form></p> </form></p>
<p>
#(info)# #(info)#
<!-- info 0 -->
:: ::
<br><b>The file DATA/PLASMADB/crawlProfiles0.db is missing or corrupted. <!-- info 1 -->
<b>The file DATA/PLASMADB/crawlProfiles0.db is missing or corrupted.
Please delete that file and restart.</b><br> Please delete that file and restart.</b><br>
:: ::
<br> <!-- info 2 -->
<b>Proxy pre-fetch is now set to depth-#[message]#.</b><br> <b>Pre-fetch is now set to depth-#[message]#.</b><br>
<b>Proxy caching is now set #(caching)#off::on#(/caching)#.</b><br> <b>Caching is now #(caching)#off::on#(/caching)#.</b><br>
#(path)#::<b>Cachepath is now set to '#[return]#'.</b> Please move the old data in the new directory.<br>#(/path)#
#(size)#::<b>Cachesize is now set to #[return]#MB.</b><br>#(/size)#
#(restart)#::<br><font color="red"><b>Changes will take effect after restart only.</b></font><br>#(/restart)#
:: ::
<br><b>An error has occurred: #[error]#.</b><br> <!-- info 3 -->
<b>An error has occurred: #[error]#.</b><br>
#(/info)# #(/info)#
</p>
<p>You can see a snapshot of recently indexed pages <p>You can see a snapshot of recently indexed pages
on the <a href="/IndexMonitor.html?process=4">Proxy Index Monitor</a> Page. on the <a href="/IndexMonitor.html?process=4">Proxy Index Monitor</a> Page.

@ -40,13 +40,14 @@
// Contributions and changes to the program code must be marked as such. // Contributions and changes to the program code must be marked as such.
// You must compile this file with // You must compile this file with
// javac -classpath .:../Classes Settings_p.java // javac -classpath .:../classes ProxyIndexingMonitor_p.java
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.text.SimpleDateFormat; // import java.text.SimpleDateFormat;
import java.util.Date; // import java.util.Date;
import java.util.Locale; // import java.util.Locale;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlProfile;
@ -54,22 +55,24 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyCore;
public class ProxyIndexingMonitor_p { public class ProxyIndexingMonitor_p {
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); // private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
private static String daydate(Date date) { // private static String daydate(Date date) {
if (date == null) return ""; else return dayFormatter.format(date); // if (date == null) return ""; else return dayFormatter.format(date);
} // }
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements // return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env; plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
serverObjects prop = new serverObjects(); serverObjects prop = new serverObjects();
// int showIndexedCount = 20;
// boolean se = false;
int showIndexedCount = 20; String oldProxyCache, newProxyCache;
boolean se = false; String oldProxyCacheSize, newProxyCacheSize;
prop.put("info", 0); prop.put("info", 0);
prop.put("info_message", ""); prop.put("info_message", "");
@ -83,20 +86,58 @@ public class ProxyIndexingMonitor_p {
boolean proxyStoreHTCache = ((String) post.get("proxyStoreHTCache", "")).equals("on"); boolean proxyStoreHTCache = ((String) post.get("proxyStoreHTCache", "")).equals("on");
env.setConfig("proxyStoreHTCache", (proxyStoreHTCache) ? "true" : "false"); env.setConfig("proxyStoreHTCache", (proxyStoreHTCache) ? "true" : "false");
// added proxyCache, proxyCacheSize - Borg-0300
// proxyCache - check and create the directory
oldProxyCache = env.getConfig("proxyCache", "DATA/HTCACHE");
newProxyCache = ((String) post.get("proxyCache", "DATA/HTCACHE"));
newProxyCache = newProxyCache.replace("\\", "/");
if (newProxyCache.endsWith("/")) newProxyCache.substring(0, newProxyCache.length() - 1);
File cp = new File(newProxyCache);
if ((!cp.isDirectory()) && (!cp.isFile())) cp.mkdirs();
env.setConfig("proxyCache", newProxyCache);
// proxyCacheSize
oldProxyCacheSize = Integer.toString(Integer.parseInt(env.getConfig("proxyCacheSize", "64")));
newProxyCacheSize = Integer.toString(Integer.parseInt((String) post.get("proxyCacheSize", "64")));
env.setConfig("proxyCacheSize", newProxyCacheSize);
// implant these settings also into the crawling profile for the proxy // implant these settings also into the crawling profile for the proxy
plasmaCrawlProfile.entry profile = switchboard.profiles.getEntry(switchboard.getConfig("defaultProxyProfile", "")); plasmaCrawlProfile.entry profile = switchboard.profiles.getEntry(switchboard.getConfig("defaultProxyProfile", ""));
if (profile == null) { if (profile == null) {
prop.put("info", 1);//delete DATA/PLASMADB/crawlProfiles0.db prop.put("info", 1); //delete DATA/PLASMADB/crawlProfiles0.db
} else { } else {
try { try {
profile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth)); profile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth));
profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false"); profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false");
prop.put("info", 2);//new proxyPrefetchdepth prop.put("info", 2);//new proxyPrefetchdepth
prop.put("info_message", newProxyPrefetchDepth); prop.put("info_message", newProxyPrefetchDepth);
prop.put("info_caching", (proxyStoreHTCache) ? 1 : 0); prop.put("info_caching", (proxyStoreHTCache) ? 1 : 0);
// proxyCache - only display on change
if (oldProxyCache.compareTo(newProxyCache) == 0) {
prop.put("info_path", 0);
prop.put("info_path_return", oldProxyCache);
} else {
prop.put("info_path", 1);
prop.put("info_path_return", newProxyCache);
}
// proxyCacheSize - only display on change
if (oldProxyCacheSize.compareTo(newProxyCacheSize) == 0) {
prop.put("info_size", 0);
prop.put("info_size_return", oldProxyCacheSize);
} else {
prop.put("info_size", 1);
prop.put("info_size_return", newProxyCacheSize);
}
// proxyCache, proxyCacheSize we need a restart
prop.put("info_restart", 0);
prop.put("info_restart_return", 0);
if (oldProxyCache.compareTo(newProxyCache) != 0) prop.put("info_restart", 1);
if (oldProxyCacheSize.compareTo(newProxyCacheSize) != 0) prop.put("info_restart", 1);
} catch (IOException e) { } catch (IOException e) {
prop.put("info", 3); //Error: errmsg prop.put("info", 3); //Error: errmsg
prop.put("info_error", e.getMessage()); prop.put("info_error", e.getMessage());
} }
} }
@ -109,8 +150,9 @@ public class ProxyIndexingMonitor_p {
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("proxyStoreHTCacheChecked", env.getConfig("proxyStoreHTCache", "").equals("true") ? 1 : 0); prop.put("proxyStoreHTCacheChecked", env.getConfig("proxyStoreHTCache", "").equals("true") ? 1 : 0);
prop.put("proxyCache", env.getConfig("proxyCache", "DATA/HTCACHE"));
prop.put("proxyCacheSize", env.getConfig("proxyCacheSize", "64"));
// return rewrite properties // return rewrite properties
return prop; return prop;
} }
} }

@ -113,8 +113,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
int p; int p;
if ((p = us.indexOf("#")) >= 0) us = us.substring(0, p); if ((p = us.indexOf("#")) >= 0) us = us.substring(0, p);
if (us.endsWith(":80")) us = us.substring(0, us.length() - 3); if (us.endsWith(":80")) us = us.substring(0, us.length() - 3);
p = us.indexOf(":80/"); if ((p = us.indexOf(":80/")) >= 0) us = us.substring(0,p).concat(us.substring(p + 3));
if (p >= 0) us = us.substring(0,p).concat(us.substring(p + 3));
if (((us.endsWith("/")) && (us.lastIndexOf('/', us.length() - 2) < 8))) us = us.substring(0, us.length() - 1); if (((us.endsWith("/")) && (us.lastIndexOf('/', us.length() - 2) < 8))) us = us.substring(0, us.length() - 1);
return us; return us;
} }

@ -59,9 +59,9 @@ import java.util.Date;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.Calendar; //import java.util.Calendar;
import java.util.GregorianCalendar; //import java.util.GregorianCalendar;
import java.util.TimeZone; //import java.util.TimeZone;
import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
@ -89,46 +89,47 @@ public final class plasmaHTCache {
public static serverLog log; public static serverLog log;
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) { public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb) {
//this.switchboard = switchboard; // this.switchboard = switchboard;
this.log = new serverLog("HTCACHE"); this.log = new serverLog("HTCACHE");
this.cachePath = htCachePath; this.cachePath = htCachePath;
this.maxCacheSize = maxCacheSize; this.maxCacheSize = maxCacheSize;
// set cache path // we dont need check the path, because we have do that in plasmaSwitchboard.java - Borg-0300
if (!(htCachePath.exists())) { /* // set cache path
// make the cache path if (!(htCachePath.exists())) {
htCachePath.mkdir(); // make the cache path
} htCachePath.mkdir();
if (!(htCachePath.isDirectory())) { }
// if the cache does not exists or is a file and not a directory, panic if (!(htCachePath.isDirectory())) {
System.out.println("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created"); // if the cache does not exists or is a file and not a directory, panic
System.exit(0); System.out.println("the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created");
} System.exit(0);
}*/
// open the response header database
File dbfile = new File(cachePath, "responseHeader.db"); // open the response header database
try { File dbfile = new File(cachePath, "responseHeader.db");
try {
if (dbfile.exists()) if (dbfile.exists())
responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400)); responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400));
else else
responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, plasmaCrawlLURL.urlHashLength, 150)); responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, plasmaCrawlLURL.urlHashLength, 150));
} catch (IOException e) { } catch (IOException e) {
System.out.println("the request header database could not be opened: " + e.getMessage()); System.out.println("the request header database could not be opened: " + e.getMessage());
System.exit(0); System.exit(0);
} }
// init stack // init stack
cacheStack = new LinkedList(); cacheStack = new LinkedList();
// init cache age and size management // init cache age and size management
cacheAge = new TreeMap(); cacheAge = new TreeMap();
currCacheSize = 0; currCacheSize = 0;
this.maxCacheSize = maxCacheSize; this.maxCacheSize = maxCacheSize;
// start the cache startup thread // start the cache startup thread
// this will collect information about the current cache size and elements // this will collect information about the current cache size and elements
serverInstantThread.oneTimeJob(this, "cacheScan", log, 5000); serverInstantThread.oneTimeJob(this, "cacheScan", log, 5000);
} }
public int size() { public int size() {
@ -200,16 +201,15 @@ public final class plasmaHTCache {
while ((currCacheSize > maxCacheSize) && (cacheAge.size() > 0)) { while ((currCacheSize > maxCacheSize) && (cacheAge.size() > 0)) {
f = (File) cacheAge.remove(cacheAge.firstKey()); f = (File) cacheAge.remove(cacheAge.firstKey());
if ((f != null) && (f.exists())) { if ((f != null) && (f.exists())) {
currCacheSize -= f.length(); long size = f.length();
//currCacheSize -= f.length();
if (f.delete()) { if (f.delete()) {
log.logInfo("DELETED OLD CACHE : " + f.toString()); log.logInfo("DELETED OLD CACHE : " + f.toString());
currCacheSize -= size;
f = f.getParentFile(); f = f.getParentFile();
if ((f.exists()) && (f.isDirectory())) { if (f.isDirectory() && (f.list().length == 0)) {
// check size of directory // the directory has no files in it; delete it also
if (f.list().length == 0) { if (f.delete()) log.logInfo("DELETED EMPTY DIRECTORY : " + f.toString());
// the directory has no files in it; delete it also
if (f.delete()) log.logInfo("DELETED EMPTY DIRECTORY : " + f.toString());
}
} }
} }
} }
@ -221,72 +221,71 @@ public final class plasmaHTCache {
} }
private String ageString(long date, File f) { private String ageString(long date, File f) {
StringBuffer sb = new StringBuffer(32); StringBuffer sb = new StringBuffer(32);
String s = Long.toHexString(date); String s = Long.toHexString(date);
for (int i = s.length(); i < 16; i++) sb.append('0'); for (int i = s.length(); i < 16; i++) sb.append('0');
sb.append(s); sb.append(s);
s = Integer.toHexString(f.hashCode()); s = Integer.toHexString(f.hashCode());
for (int i = s.length(); i < 8; i++) sb.append('0'); for (int i = s.length(); i < 8; i++) sb.append('0');
sb.append(s); sb.append(s);
return sb.toString(); return sb.toString();
} }
public void cacheScan() {
//log.logSystem("STARTING CACHE SCANNING");
kelondroMScoreCluster doms = new kelondroMScoreCluster();
int c = 0;
enumerateFiles ef = new enumerateFiles(cachePath, true, false, true, true);
File f;
while (ef.hasMoreElements()) {
c++;
f = (File) ef.nextElement();
long d = f.lastModified();
//System.out.println("Cache: " + dom(f));
doms.incScore(dom(f));
currCacheSize += f.length();
cacheAge.put(ageString(d, f), f);
}
//System.out.println("%" + (String) cacheAge.firstKey() + "=" + cacheAge.get(cacheAge.firstKey()));
long ageHours = 0;
try {
ageHours = (System.currentTimeMillis() -
Long.parseLong(((String) cacheAge.firstKey()).substring(0, 16), 16)) / 3600000;
} catch (NumberFormatException e) {
//e.printStackTrace();
}
log.logSystem("CACHE SCANNED, CONTAINS " + c +
" FILES = " + currCacheSize/1048576 + "MB, OLDEST IS " +
((ageHours < 24) ? (ageHours + " HOURS") : ((ageHours / 24) + " DAYS")) +
" OLD");
cleanup();
// start to prefetch ip's from dns
String dom;
long start = System.currentTimeMillis();
String ip, result = "";
c = 0;
while ((doms.size() > 0) && (c < 50) && ((System.currentTimeMillis() - start) < 60000)) {
dom = (String) doms.getMaxObject();
ip = httpc.dnsResolve(dom);
if (ip == null) break;
result += ", " + dom + "=" + ip;
log.logSystem("PRE-FILLED " + dom + "=" + ip);
c++;
doms.deleteScore(dom);
// wait a short while to prevent that this looks like a DoS
try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
}
if (result.length() > 2) log.logSystem("PRE-FILLED DNS CACHE, FETCHED " + c +
" ADDRESSES: " + result.substring(2));
}
private String dom(File f) { public void cacheScan() {
String s = f.toString().substring(cachePath.toString().length() + 1); //log.logSystem("STARTING CACHE SCANNING");
int p = s.indexOf("/"); kelondroMScoreCluster doms = new kelondroMScoreCluster();
if (p < 0) p = s.indexOf("\\"); int c = 0;
if (p < 0) return null; enumerateFiles ef = new enumerateFiles(cachePath, true, false, true, true);
return s.substring(0, p); File f;
while (ef.hasMoreElements()) {
c++;
f = (File) ef.nextElement();
long d = f.lastModified();
//System.out.println("Cache: " + dom(f));
doms.incScore(dom(f));
currCacheSize += f.length();
cacheAge.put(ageString(d, f), f);
}
//System.out.println("%" + (String) cacheAge.firstKey() + "=" + cacheAge.get(cacheAge.firstKey()));
long ageHours = 0;
try {
ageHours = (System.currentTimeMillis() -
Long.parseLong(((String) cacheAge.firstKey()).substring(0, 16), 16)) / 3600000;
} catch (NumberFormatException e) {
//e.printStackTrace();
} }
log.logSystem("CACHE SCANNED, CONTAINS " + c +
" FILES = " + currCacheSize/1048576 + "MB, OLDEST IS " +
((ageHours < 24) ? (ageHours + " HOURS") : ((ageHours / 24) + " DAYS")) + " OLD");
cleanup();
// start to prefetch ip's from dns
String dom;
long start = System.currentTimeMillis();
String ip, result = "";
c = 0;
while ((doms.size() > 0) && (c < 50) && ((System.currentTimeMillis() - start) < 60000)) {
dom = (String) doms.getMaxObject();
ip = httpc.dnsResolve(dom);
if (ip == null) break;
result += ", " + dom + "=" + ip;
log.logSystem("PRE-FILLED " + dom + "=" + ip);
c++;
doms.deleteScore(dom);
// wait a short while to prevent that this looks like a DoS
try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
}
if (result.length() > 2) log.logSystem("PRE-FILLED DNS CACHE, FETCHED " + c +
" ADDRESSES: " + result.substring(2));
}
private String dom(File f) {
String s = f.toString().substring(cachePath.toString().length() + 1);
int p = s.indexOf("/");
if (p < 0) p = s.indexOf("\\");
if (p < 0) return null;
return s.substring(0, p);
}
public httpHeader getCachedResponse(String urlHash) throws IOException { public httpHeader getCachedResponse(String urlHash) throws IOException {
Map hdb = responseHeaderDB.get(urlHash); Map hdb = responseHeaderDB.get(urlHash);
@ -295,11 +294,11 @@ public final class plasmaHTCache {
} }
public boolean full() { public boolean full() {
return (cacheStack.size() > stackLimit); return (cacheStack.size() > stackLimit);
} }
public boolean empty() { public boolean empty() {
return (cacheStack.size() == 0); return (cacheStack.size() == 0);
} }
public static boolean isPicture(httpHeader response) { public static boolean isPicture(httpHeader response) {
@ -343,55 +342,67 @@ public final class plasmaHTCache {
return plasmaParser.mediaExtContains(urlString); return plasmaParser.mediaExtContains(urlString);
} }
// this method creates from a given host and path a cache path /**
* this method creates from a given host and path a cache path
* from a given host (which may also be an IPv4 - number, but not IPv6 or
* a domain; all without leading 'http://') and a path (which must start
* with a leading '/', and may also end in an '/') a path to a file
* in the file system with root as given in cachePath is constructed
* it will also be ensured, that the complete path exists; if necessary
* that path will be generated
* @return URL
*/
public File getCachePath(URL url) { public File getCachePath(URL url) {
// from a given host (which may also be an IPv4 - number, but not IPv6 or // System.out.println("DEBUG: getCachePath: IN=" + url.toString());
// a domain; all without leading 'http://') and a path (which must start String remotePath = url.getPath();
// with a leading '/', and may also end in an '/') a path to a file if (!(remotePath.startsWith("/"))) remotePath = "/" + remotePath;
// in the file system with root as given in cachePath is constructed if (remotePath.endsWith("/")) remotePath = remotePath + "ndx";
// it will also be ensured, that the complete path exists; if necessary
// that path will be generated
//System.out.println("DEBUG: getCachedPath=" + url.toString());
String remotePath = url.getPath();
if (!(remotePath.startsWith("/"))) remotePath = "/" + remotePath;
if (remotePath.endsWith("/")) remotePath = remotePath + "ndx";
if (remotePath.indexOf('#') > 0) remotePath.substring(0, remotePath.indexOf('#')); if (remotePath.indexOf('#') > 0) remotePath.substring(0, remotePath.indexOf('#'));
remotePath = remotePath.replace('?', '_'); remotePath = remotePath.replace('?', '_');
remotePath = remotePath.replace('&', '_'); // yes this is not reversible, but that is not needed remotePath = remotePath.replace('&', '_'); // yes this is not reversible, but that is not needed
remotePath = remotePath.replace(':', '_'); // yes this is not reversible, but that is not needed remotePath = remotePath.replace(':', '_'); // yes this is not reversible, but that is not needed
int port = url.getPort(); int port = url.getPort();
if (port < 0) port = 80; if (port < 0) port = 80;
return new File(this.cachePath, url.getHost() + ((port == 80) ? "" : ("+" + port)) + remotePath); // System.out.println("DEBUG: getCachePath: OUT=" + url.getHost() + ((port == 80) ? "" : ("+" + port)) + remotePath);
return new File(this.cachePath, url.getHost() + ((port == 80) ? "" : ("+" + port)) + remotePath);
} }
/**
* this is the reverse function to getCachePath: it constructs the url as string
* from a given storage path
*/
public static URL getURL(File cachePath, File f) { public static URL getURL(File cachePath, File f) {
// this is the reverse function to getCachePath: it constructs the url as string // System.out.println("DEBUG: getURL: IN: Path=[" + cachePath + "]");
// from a given storage path // System.out.println("DEBUG: getURL: IN: File=[" + f + "]");
String s = f.toString().replace('\\', '/'); String s = f.toString().replace('\\', '/');
String c = cachePath.toString().replace('\\', '/'); String c = cachePath.toString().replace('\\', '/');
//System.out.println("DEBUG: getURL for c=" + c + ", s=" + s); int p = s.lastIndexOf(c);
int p = s.lastIndexOf(c); if (p >= 0) {
if (p >= 0) { s = s.substring(p + c.length());
s = s.substring(p + c.length()); while (s.startsWith("/")) s = s.substring(1);
while (s.startsWith("/")) s = s.substring(1); if ((p = s.indexOf("+")) >= 0) {
if ((p = s.indexOf("+")) >= 0) {
s = s.substring(0, p) + ":" + s.substring(p + 1); s = s.substring(0, p) + ":" + s.substring(p + 1);
} else { /* } else {
p = s.indexOf("/"); p = s.indexOf("/");
if (p < 0) if (p < 0)
s = s + ":80/"; s = s + ":80/";
else else
s = s.substring(0, p) + ":80" + s.substring(p); s = s.substring(0, p) + ":80" + s.substring(p);*/
} }
if (s.endsWith("ndx")) s = s.substring(0, s.length() - 3); if (s.endsWith("ndx")) s = s.substring(0, s.length() - 3);
//System.out.println("DEBUG: getURL url=" + s); // System.out.println("DEBUG: getURL: OUT=" + s);
try { try {
/* URL url = null;
url = new URL("http://" + s);
System.out.println("DEBUG: getURL: URL=" + url.toString());
return url;//new URL("http://" + s); */
return new URL("http://" + s); return new URL("http://" + s);
} catch (Exception e) { } catch (Exception e) {
return null; return null;
} }
} }
return null; return null;
} }
public byte[] loadResource(URL url) { public byte[] loadResource(URL url) {
@ -407,8 +418,8 @@ public final class plasmaHTCache {
} }
public static boolean isPOST(String urlString) { public static boolean isPOST(String urlString) {
return ((urlString.indexOf("?") >= 0) || return ((urlString.indexOf("?") >= 0) ||
(urlString.indexOf("&") >= 0)); (urlString.indexOf("&") >= 0));
} }
public static boolean isCGI(String urlString) { public static boolean isCGI(String urlString) {
@ -421,8 +432,8 @@ public final class plasmaHTCache {
} }
public Entry newEntry(Date initDate, int depth, URL url, String name, public Entry newEntry(Date initDate, int depth, URL url, String name,
httpHeader requestHeader, httpHeader requestHeader,
String responseStatus, httpHeader responseHeader, String responseStatus, httpHeader responseHeader,
String initiator, String initiator,
plasmaCrawlProfile.entry profile) { plasmaCrawlProfile.entry profile) {
return new Entry(initDate, depth, url, name, requestHeader, responseStatus, responseHeader, initiator, profile); return new Entry(initDate, depth, url, name, requestHeader, responseStatus, responseHeader, initiator, profile);
@ -430,108 +441,108 @@ public final class plasmaHTCache {
public final class Entry { public final class Entry {
// the class objects // the class objects
public Date initDate; // the date when the request happened; will be used as a key public Date initDate; // the date when the request happened; will be used as a key
public int depth; // the depth of prefetching public int depth; // the depth of prefetching
public httpHeader requestHeader; // we carry also the header to prevent too many file system access public httpHeader requestHeader; // we carry also the header to prevent too many file system access
public String responseStatus; public String responseStatus;
public httpHeader responseHeader; // we carry also the header to prevent too many file system access public httpHeader responseHeader; // we carry also the header to prevent too many file system access
public File cacheFile; // the cache file public File cacheFile; // the cache file
public byte[] cacheArray; // or the cache as byte-array public byte[] cacheArray; // or the cache as byte-array
public URL url; public URL url;
public String name; // the name of the link, read as anchor from an <a>-tag public String name; // the name of the link, read as anchor from an <a>-tag
public String nomalizedURLHash; public String nomalizedURLHash;
public String nomalizedURLString; public String nomalizedURLString;
public int status; // cache load/hit/stale etc status public int status; // cache load/hit/stale etc status
public Date lastModified; public Date lastModified;
public char doctype; public char doctype;
public String language; public String language;
public plasmaCrawlProfile.entry profile; public plasmaCrawlProfile.entry profile;
private String initiator; private String initiator;
public Entry(Date initDate, int depth, URL url, String name,
public Entry(Date initDate, int depth, URL url, String name, httpHeader requestHeader,
httpHeader requestHeader, String responseStatus, httpHeader responseHeader,
String responseStatus, httpHeader responseHeader, String initiator,
String initiator, plasmaCrawlProfile.entry profile) {
plasmaCrawlProfile.entry profile) {
// normalize url - Borg-0300
// normalize url serverLog.logDebug("PLASMA", "Entry: URL=" + url.toString());
this.nomalizedURLString = htmlFilterContentScraper.urlNormalform(url); this.nomalizedURLString = htmlFilterContentScraper.urlNormalform(url);
try { try {
this.url = new URL(nomalizedURLString); this.url = new URL(nomalizedURLString);
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
System.out.println("internal error at httpdProxyCache.Entry: " + e); System.out.println("internal error at httpdProxyCache.Entry: " + e);
System.exit(-1); System.exit(-1);
}
this.name = name;
this.cacheFile = getCachePath(this.url);
this.nomalizedURLHash = plasmaCrawlLURL.urlHash(nomalizedURLString);
// assigned:
this.initDate = initDate;
this.depth = depth;
this.requestHeader = requestHeader;
this.responseStatus = responseStatus;
this.responseHeader = responseHeader;
this.profile = profile;
this.initiator = (initiator == null) ? null : ((initiator.length() == 0) ? null: initiator);
// calculated:
if (responseHeader == null) {
try {
throw new RuntimeException("RESPONSE HEADER = NULL");
} catch (Exception e) {
System.out.println("RESPONSE HEADER = NULL in " + url);
e.printStackTrace();
System.exit(0);
}
lastModified = serverDate.correctedGMTDate();
} else {
lastModified = responseHeader.lastModified();
if (lastModified == null) lastModified = serverDate.correctedGMTDate(); // does not exist in header
}
this.doctype = plasmaWordIndexEntry.docType(responseHeader.mime());
if (this.doctype == plasmaWordIndexEntry.DT_UNKNOWN) this.doctype = plasmaWordIndexEntry.docType(url);
this.language = plasmaWordIndexEntry.language(url);
// to be defined later:
this.cacheArray = null;
}
public String name() {
return name;
}
public String initiator() {
return initiator;
} }
public boolean proxy() { this.name = name;
return initiator() == null; this.cacheFile = getCachePath(this.url);
this.nomalizedURLHash = plasmaCrawlLURL.urlHash(nomalizedURLString);
// assigned:
this.initDate = initDate;
this.depth = depth;
this.requestHeader = requestHeader;
this.responseStatus = responseStatus;
this.responseHeader = responseHeader;
this.profile = profile;
this.initiator = (initiator == null) ? null : ((initiator.length() == 0) ? null: initiator);
// calculated:
if (responseHeader == null) {
try {
throw new RuntimeException("RESPONSE HEADER = NULL");
} catch (Exception e) {
System.out.println("RESPONSE HEADER = NULL in " + url);
e.printStackTrace();
System.exit(0);
}
lastModified = serverDate.correctedGMTDate();
} else {
lastModified = responseHeader.lastModified();
if (lastModified == null) lastModified = serverDate.correctedGMTDate(); // does not exist in header
} }
public long size() { this.doctype = plasmaWordIndexEntry.docType(responseHeader.mime());
if (cacheArray == null) return 0; else return cacheArray.length; if (this.doctype == plasmaWordIndexEntry.DT_UNKNOWN) this.doctype = plasmaWordIndexEntry.docType(url);
} this.language = plasmaWordIndexEntry.language(url);
public URL referrerURL() { // to be defined later:
if (requestHeader == null) return null; this.cacheArray = null;
try { }
return new URL((String) requestHeader.get(httpHeader.REFERER, ""));
} catch (Exception e) { public String name() {
return null; return name;
} }
public String initiator() {
return initiator;
}
public boolean proxy() {
return initiator() == null;
}
public long size() {
if (cacheArray == null) return 0; else return cacheArray.length;
}
public URL referrerURL() {
if (requestHeader == null) return null;
try {
return new URL((String) requestHeader.get(httpHeader.REFERER, ""));
} catch (Exception e) {
return null;
} }
}
/* /*
public boolean update() { public boolean update() {
return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD)); return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD));
} }
*/ */
// the following three methods for cache read/write granting shall be as loose as possible // the following three methods for cache read/write granting shall be as loose as possible
// but also as strict as necessary to enable caching of most items // but also as strict as necessary to enable caching of most items
public String shallStoreCacheForProxy() { public String shallStoreCacheForProxy() {
// returns NULL if the answer is TRUE // returns NULL if the answer is TRUE
// in case of FALSE, the reason as String is returned // in case of FALSE, the reason as String is returned

@ -100,25 +100,25 @@
package de.anomic.plasma; package de.anomic.plasma;
import java.io.BufferedReader; // import java.io.BufferedReader;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; // import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; // import java.io.InputStreamReader;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import java.util.Enumeration; // import java.util.Enumeration;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.TreeMap; // import java.util.TreeMap;
import java.util.Vector; // import java.util.Vector;
import de.anomic.data.messageBoard; import de.anomic.data.messageBoard;
import de.anomic.data.wikiBoard; import de.anomic.data.wikiBoard;
@ -130,24 +130,23 @@ import de.anomic.kelondro.kelondroTables;
import de.anomic.server.serverAbstractSwitch; import de.anomic.server.serverAbstractSwitch;
import de.anomic.server.serverCodings; import de.anomic.server.serverCodings;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
import de.anomic.server.serverDate; // import de.anomic.server.serverDate;
import de.anomic.server.serverInstantThread; import de.anomic.server.serverInstantThread;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSemaphore; import de.anomic.server.serverSemaphore;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.server.serverFileUtils; // import de.anomic.server.serverFileUtils;
import de.anomic.tools.bitfield; import de.anomic.tools.bitfield;
import de.anomic.tools.crypt; import de.anomic.tools.crypt;
import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB; // import de.anomic.yacy.yacySeedDB;
public final class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch { public final class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch {
// load slots // load slots
public static int crawlSlots = 10; public static int crawlSlots = 10;
public static int indexingSlots = 100; public static int indexingSlots = 100;
@ -158,7 +157,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public static plasmaURLPattern urlBlacklist; public static plasmaURLPattern urlBlacklist;
// storage management // storage management
private File cachePath; private File cachePath; // do we need that ?
private File plasmaPath; private File plasmaPath;
public File listsPath; public File listsPath;
public plasmaURLPool urlPool; public plasmaURLPool urlPool;
@ -192,13 +191,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
private static plasmaSwitchboard sb; private static plasmaSwitchboard sb;
public plasmaSwitchboard(String rootPath, String initPath, String configPath) throws IOException { public plasmaSwitchboard(String rootPath, String initPath, String configPath) throws IOException {
super(rootPath, initPath, configPath); super(rootPath, initPath, configPath);
// set loglevel and log // set loglevel and log
setLog(new serverLog("PLASMA")); setLog(new serverLog("PLASMA"));
// load values from configs // load values from configs
plasmaPath = new File(rootPath, getConfig("dbPath", "PLASMADB")); plasmaPath = new File(rootPath, getConfig("dbPath", "PLASMADB"));
listsPath = new File(rootPath, getConfig("listsPath", "LISTS")); listsPath = new File(rootPath, getConfig("listsPath", "LISTS"));
remoteProxyHost = getConfig("remoteProxyHost", ""); remoteProxyHost = getConfig("remoteProxyHost", "");
try { try {
@ -217,12 +216,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (!(listsPath.exists())) listsPath.mkdirs(); if (!(listsPath.exists())) listsPath.mkdirs();
// load coloured lists // load coloured lists
if (blueList == null) { if (blueList == null) {
// read only once upon first instantiation of this class // read only once upon first instantiation of this class
String f = getConfig("plasmaBlueList", null); String f = getConfig("plasmaBlueList", null);
if (f != null) blueList = kelondroMSetTools.loadList(new File(f)); else blueList= new TreeSet(); if (f != null) blueList = kelondroMSetTools.loadList(new File(f)); else blueList= new TreeSet();
} }
// load the black-list / inspired by [AS] // load the black-list / inspired by [AS]
urlBlacklist = new plasmaURLPattern(new File(getRootPath(), getConfig("listsPath", "DATA/LISTS"))); urlBlacklist = new plasmaURLPattern(new File(getRootPath(), getConfig("listsPath", "DATA/LISTS")));
@ -238,7 +237,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
stopwords = kelondroMSetTools.loadList(new File(rootPath, "yacy.stopwords")); stopwords = kelondroMSetTools.loadList(new File(rootPath, "yacy.stopwords"));
} }
// read memory amount // read memory amount
int ramLURL = Integer.parseInt(getConfig("ramCacheLURL", "1024")) / 1024; int ramLURL = Integer.parseInt(getConfig("ramCacheLURL", "1024")) / 1024;
int ramNURL = Integer.parseInt(getConfig("ramCacheNURL", "1024")) / 1024; int ramNURL = Integer.parseInt(getConfig("ramCacheNURL", "1024")) / 1024;
int ramEURL = Integer.parseInt(getConfig("ramCacheEURL", "1024")) / 1024; int ramEURL = Integer.parseInt(getConfig("ramCacheEURL", "1024")) / 1024;
@ -254,7 +253,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
log.logSystem("Message Cache memory = " + ppRamString(ramMessage)); log.logSystem("Message Cache memory = " + ppRamString(ramMessage));
log.logSystem("Wiki Cache memory = " + ppRamString(ramWiki)); log.logSystem("Wiki Cache memory = " + ppRamString(ramWiki));
// make crawl profiles database and default profiles // make crawl profiles database and default profiles
log.logSystem("Initializing Crawl Profiles"); log.logSystem("Initializing Crawl Profiles");
profiles = new plasmaCrawlProfile(new File(plasmaPath, "crawlProfiles0.db")); profiles = new plasmaCrawlProfile(new File(plasmaPath, "crawlProfiles0.db"));
initProfiles(); initProfiles();
@ -270,7 +269,21 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// start a cache manager // start a cache manager
log.logSystem("Starting HT Cache Manager"); log.logSystem("Starting HT Cache Manager");
File htCachePath = new File(getRootPath(), getConfig("proxyCache","HTCACHE"));
// create the Cache directorie - Borg-0300
String cp = getConfig("proxyCache", "DATA/HTCACHE");
cp = cp.replace('\\', '/');
if (cp.endsWith("/")) cp = cp.substring(0,cp.length() - 1);
File htCachePath = new File(cp);
if (!(htCachePath.exists())) htCachePath.mkdirs();
if (!(htCachePath.isDirectory())) {
// if the cache does not exists or is a file and not a directory, panic
serverLog.logSystem("PLASMA", "the cache path " + htCachePath.toString() + " is not a directory or does not exists and cannot be created");
System.exit(0);
} else {
serverLog.logInfo("PLASMA", "proxyCache=" + cp);
}
long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig("proxyCacheSize", "2")); // this is megabyte long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig("proxyCacheSize", "2")); // this is megabyte
this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP); this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP);
@ -309,7 +322,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
messageDB = new messageBoard(new File(getRootPath(), "DATA/SETTINGS/message.db"), ramMessage); messageDB = new messageBoard(new File(getRootPath(), "DATA/SETTINGS/message.db"), ramMessage);
log.logSystem("Starting Wiki Board"); log.logSystem("Starting Wiki Board");
wikiDB = new wikiBoard(new File(getRootPath(), "DATA/SETTINGS/wiki.db"), wikiDB = new wikiBoard(new File(getRootPath(), "DATA/SETTINGS/wiki.db"),
new File(getRootPath(), "DATA/SETTINGS/wiki-bkp.db"), ramWiki); new File(getRootPath(), "DATA/SETTINGS/wiki-bkp.db"), ramWiki);
// init cookie-Monitor // init cookie-Monitor
log.logSystem("Starting Cookie Monitor"); log.logSystem("Starting Cookie Monitor");

Loading…
Cancel
Save