*) Bugfix for relative URL Bug in Crawler

See: http://www.yacy-forum.de/viewtopic.php?p=13266#13266

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1130 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 7e670894d9
commit c650b112ea

@ -362,7 +362,13 @@ public final class plasmaCrawlWorker extends Thread {
// reserve cache entry
plasmaHTCache.Entry htCache = cacheManager.newEntry(requestDate, depth, url, name, requestHeader, res.status, res.responseHeader, initiator, profile);
if (!htCache.cacheFile.getCanonicalPath().startsWith(cacheManager.cachePath.getAbsolutePath())) {
// if the response has not the right file type then reject file
remote.close();
log.logInfo("REJECTED URL " + url.toString() + " because of an invalid file path '" + htCache.cacheFile.getAbsolutePath() + "'.");
return;
}
// request has been placed and result has been returned. work off response
File cacheFile = cacheManager.getCachePath(url);
try {
@ -386,6 +392,7 @@ public final class plasmaCrawlWorker extends Thread {
// if the response has not the right file type then reject file
remote.close();
log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for URL " + url.toString());
return;
}
// enQueue new entry with response header
if (profile != null) {

Loading…
Cancel
Save