HTCache extended

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2015 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
borg-0300 19 years ago
parent d0dd8b14d2
commit 30e4fc39a5

@ -102,22 +102,43 @@ public final class plasmaHTCache {
this.cachePath = htCachePath; this.cachePath = htCachePath;
// reset old HTCache ? // reset old HTCache ?
final String[] list = cachePath.list(); String[] list = this.cachePath.list();
if (list != null) { if (list != null) {
File object; File object;
for (int i = list.length - 1; i >= 0; i--) { for (int i = list.length - 1; i >= 0; i--) {
object = new File(cachePath, list[i]); object = new File(this.cachePath, list[i]);
if (object.isDirectory()) {
if (!object.getName().equals("http") && if (!object.isDirectory()) { continue; }
!object.getName().equals("yacy") &&
!object.getName().equals("https") && if (!object.getName().equals("http") &&
!object.getName().equals("ftp")) { !object.getName().equals("yacy") &&
deleteOldHTCache(cachePath); !object.getName().equals("https") &&
break; !object.getName().equals("ftp")) {
} deleteOldHTCache(this.cachePath);
break;
} }
} }
} }
File testpath = new File(this.cachePath, "/http/");
list = testpath.list();
if (list != null) {
File object;
for (int i = list.length - 1; i >= 0; i--) {
object = new File(testpath, list[i]);
if (!object.isDirectory()) { continue; }
if (!object.getName().equals("ip") &&
!object.getName().equals("other") &&
!object.getName().equals("www")) {
deleteOldHTCache(this.cachePath);
break;
}
}
}
testpath = null;
// set/make cache path // set/make cache path
if (!htCachePath.exists()) { if (!htCachePath.exists()) {
@ -324,8 +345,10 @@ public final class plasmaHTCache {
private void cleanup() { private void cleanup() {
// clean up cache to have 4% (enough) space for next entries // clean up cache to have 4% (enough) space for next entries
if (this.cacheAge.size() > 0 && this.curCacheSize >= this.maxCacheSize) { if (this.cacheAge.size() > 0 &&
if (this.maxCacheSize > 0) { cleanupDoIt(this.maxCacheSize - (this.maxCacheSize / 100) * 4); } this.curCacheSize >= this.maxCacheSize &&
this.maxCacheSize > 0) {
cleanupDoIt(this.maxCacheSize - (this.maxCacheSize / 100) * 4);
} }
} }
@ -480,17 +503,25 @@ public final class plasmaHTCache {
* that path will be generated * that path will be generated
* @return new File * @return new File
*/ */
public File getCachePath(URL url) { public File getCachePath(final URL url) {
// this.log.logFinest("plasmaHTCache: getCachePath: IN=" + url.toString()); // this.log.logFinest("plasmaHTCache: getCachePath: IN=" + url.toString());
// peer.yacy || www.peer.yacy = http/yacy/peer
// protocol://www.doamin.net = protocol/www/domain.net
// protocol://other.doamin.net = protocol/other/other.domain.net
// protocol://xxx.xxx.xxx.xxx = protocol/ip/xxx.xxx.xxx.xxx
String host = url.getHost().toLowerCase();
String path = url.getPath(); String path = url.getPath();
String query = url.getQuery(); final String query = url.getQuery();
if (!path.startsWith("/")) { path = "/" + path; } if (!path.startsWith("/")) { path = "/" + path; }
if (path.endsWith("/") && query == null) { path = path + "ndx"; } if (path.endsWith("/") && query == null) { path = path + "ndx"; }
// yes this is not reversible, but that is not needed // yes this is not reversible, but that is not needed
path = replaceRegex(path, "/\\.\\./", "/!!/"); path = replaceRegex(path, "/\\.\\./", "/!!/");
path = replaceRegex(path, "(\"|\\\\|\\*|\\?|:|<|>|\\|)", "_"); // hier wird kein '/' gefiltert path = replaceRegex(path, "(\"|\\\\|\\*|\\?|:|<|>|\\|+)", "_"); // hier wird kein '/' gefiltert
path = path.concat(replaceRegex(query, "(\"|\\\\|\\*|\\?|/|:|<|>|\\|)", "_")); path = path.concat(replaceRegex(query, "(\"|\\\\|\\*|\\?|/|:|<|>|\\|+)", "_"));
// only set NO default ports // only set NO default ports
int port = url.getPort(); int port = url.getPort();
@ -502,13 +533,23 @@ public final class plasmaHTCache {
port = -1; port = -1;
} }
} }
if (url.getHost().toLowerCase().endsWith(".yacy")) { if (host.endsWith(".yacy")) {
host = host.substring(0, host.length() - 5);
if (host.startsWith("www.")) {
host = host.substring(4);
}
protocol = "yacy"; protocol = "yacy";
} else if (host.startsWith("www.")) {
host = "www/" + host.substring(4);
} else if (host.matches("\\d{2,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}")) {
host = "ip/" + host;
} else {
host = "other/" + host;
} }
if (port < 0) { if (port < 0) {
return new File(this.cachePath, protocol + "/" + url.getHost() + path); return new File(this.cachePath, protocol + "/" + host + path);
} else { } else {
return new File(this.cachePath, protocol + "/" + url.getHost() + "!" + port + path); return new File(this.cachePath, protocol + "/" + host + "!" + port + path);
} }
} }
@ -516,51 +557,86 @@ public final class plasmaHTCache {
* this is the reverse function to getCachePath: it constructs the url as string * this is the reverse function to getCachePath: it constructs the url as string
* from a given storage path * from a given storage path
*/ */
public static URL getURL(File cachePath, File f) { public static URL getURL(final File cachePath, final File f) {
// this.log.logFinest("plasmaHTCache: getURL: IN: Path=[" + cachePath + "] File=[" + f + "]"); // this.log.logFinest("plasmaHTCache: getURL: IN: Path=[" + cachePath + "] File=[" + f + "]");
final String c = cachePath.toString().replace('\\', '/'); final String c = cachePath.toString().replace('\\', '/');
String s = f.toString().replace('\\', '/'); String path = f.toString().replace('\\', '/');
if (s.endsWith("ndx")) { s = s.substring(0, s.length() - 3); } if (path.endsWith("ndx")) { path = path.substring(0, path.length() - 3); }
int pos = s.lastIndexOf(c); int pos = path.lastIndexOf(c);
if (pos == 0) { if (pos == 0) {
s = s.substring(pos + c.length()); path = path.substring(pos + c.length());
while (s.startsWith("/")) { s = s.substring(1); } while (path.startsWith("/")) { path = path.substring(1); }
String protocol = ""; pos = path.indexOf("!");
if (s.startsWith("http/")) { if (pos >= 0) {
protocol = "http://"; path = path.substring(0, pos) + ":" + path.substring(pos + 1);
s = s.substring(5);
} else if (s.startsWith("https/")) {
protocol = "https://";
s = s.substring(6);
} else if (s.startsWith("ftp/")) {
protocol = "ftp://";
s = s.substring(4);
} else if (s.startsWith("yacy/")) {
protocol = "http://";
s = s.substring(5);
} else {
return null;
} }
Pattern pathPattern = Pattern.compile("/!!/"); String protocol = "http://";
Matcher matcher = pathPattern.matcher(s); String host = "";
while (matcher.find()) { if (path.startsWith("yacy/")) {
s = matcher.replaceAll("/\\.\\./"); path = path.substring(5);
matcher.reset(s);
pos = path.indexOf("/");
if (pos > 0) {
host = path.substring(0, pos);
path = path.substring(pos);
} else {
host = path;
path = "";
}
pos = host.indexOf(":");
if (pos > 0) {
host = host.substring(0, pos) + ".yacy" + host.substring(pos);
} else {
host = host + ".yacy";
}
} else {
if (path.startsWith("http/")) {
path = path.substring(5);
} else if (path.startsWith("https/")) {
protocol = "https://";
path = path.substring(6);
} else if (path.startsWith("ftp/")) {
protocol = "ftp://";
path = path.substring(4);
} else {
return null;
}
if (path.startsWith("www/")) {
path = path.substring(4);
host = "www.";
} else if (path.startsWith("other/")) {
path = path.substring(6);
} else if (path.startsWith("ip/")) {
path = path.substring(3);
}
pos = path.indexOf("/");
if (pos > 0) {
host = host + path.substring(0, pos);
path = path.substring(pos);
} else {
host = host + path;
path = "";
}
} }
pos = s.indexOf("!"); if (!path.equals("")) {
if (pos >= 0) { final Pattern pathPattern = Pattern.compile("/!!/");
s = s.substring(0, pos) + ":" + s.substring(pos + 1); final Matcher matcher = pathPattern.matcher(path);
while (matcher.find()) {
path = matcher.replaceAll("/\\.\\./");
matcher.reset(path);
}
} }
// this.log.logFinest("plasmaHTCache: getURL: OUT=" + s); // this.log.logFinest("plasmaHTCache: getURL: OUT=" + s);
try { try {
return new URL(protocol + s); return new URL(protocol + host + path);
} catch (Exception e) { } catch (final Exception e) {
return null; return null;
} }
} }

Loading…
Cancel
Save