|
|
@ -58,6 +58,7 @@ import de.anomic.http.httpHeader;
|
|
|
|
import de.anomic.http.httpRemoteProxyConfig;
|
|
|
|
import de.anomic.http.httpRemoteProxyConfig;
|
|
|
|
import de.anomic.http.httpc;
|
|
|
|
import de.anomic.http.httpc;
|
|
|
|
import de.anomic.http.httpdProxyHandler;
|
|
|
|
import de.anomic.http.httpdProxyHandler;
|
|
|
|
|
|
|
|
import de.anomic.plasma.plasmaHTCache.Entry;
|
|
|
|
import de.anomic.server.logging.serverLog;
|
|
|
|
import de.anomic.server.logging.serverLog;
|
|
|
|
import de.anomic.tools.bitfield;
|
|
|
|
import de.anomic.tools.bitfield;
|
|
|
|
import de.anomic.yacy.yacyCore;
|
|
|
|
import de.anomic.yacy.yacyCore;
|
|
|
@ -356,6 +357,13 @@ public final class plasmaCrawlWorker extends Thread {
|
|
|
|
// TODO: aborting download if content is to long ...
|
|
|
|
// TODO: aborting download if content is to long ...
|
|
|
|
//long contentLength = res.responseHeader.contentLength();
|
|
|
|
//long contentLength = res.responseHeader.contentLength();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (htCache.cacheFile.getAbsolutePath().length() > Entry.MAXPATHLENGTH) {
|
|
|
|
|
|
|
|
remote.close();
|
|
|
|
|
|
|
|
log.logInfo("REJECTED URL " + url.toString() + " because path too long '" +
|
|
|
|
|
|
|
|
cacheManager.cachePath.getAbsolutePath() + "'");
|
|
|
|
|
|
|
|
return (htCache = null);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// reserve cache entry
|
|
|
|
// reserve cache entry
|
|
|
|
htCache = cacheManager.newEntry(requestDate, depth, url, name, requestHeader, res.status, res.responseHeader, initiator, profile);
|
|
|
|
htCache = cacheManager.newEntry(requestDate, depth, url, name, requestHeader, res.status, res.responseHeader, initiator, profile);
|
|
|
|
if (!htCache.cacheFile.getCanonicalPath().startsWith(cacheManager.cachePath.getCanonicalPath())) {
|
|
|
|
if (!htCache.cacheFile.getCanonicalPath().startsWith(cacheManager.cachePath.getCanonicalPath())) {
|
|
|
@ -364,13 +372,6 @@ public final class plasmaCrawlWorker extends Thread {
|
|
|
|
log.logInfo("REJECTED URL " + url.toString() + " because of an invalid file path ('" +
|
|
|
|
log.logInfo("REJECTED URL " + url.toString() + " because of an invalid file path ('" +
|
|
|
|
htCache.cacheFile.getCanonicalPath() + "' does not start with '" +
|
|
|
|
htCache.cacheFile.getCanonicalPath() + "' does not start with '" +
|
|
|
|
cacheManager.cachePath.getAbsolutePath() + "').");
|
|
|
|
cacheManager.cachePath.getAbsolutePath() + "').");
|
|
|
|
return null;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (htCache.cacheFile.getAbsolutePath().length() > 255) {
|
|
|
|
|
|
|
|
remote.close();
|
|
|
|
|
|
|
|
log.logInfo("REJECTED URL " + url.toString() + " because path too long '" +
|
|
|
|
|
|
|
|
cacheManager.cachePath.getAbsolutePath() + "'");
|
|
|
|
|
|
|
|
return (htCache = null);
|
|
|
|
return (htCache = null);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|