From c650b112eaa5c728f667d91566033c612f8a15cf Mon Sep 17 00:00:00 2001 From: theli Date: Sun, 27 Nov 2005 06:35:23 +0000 Subject: [PATCH] *) Bugfix for relative URL Bug in Crawler See: http://www.yacy-forum.de/viewtopic.php?p=13266#13266 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1130 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaCrawlWorker.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java index 14670d236..ccb1421cb 100644 --- a/source/de/anomic/plasma/plasmaCrawlWorker.java +++ b/source/de/anomic/plasma/plasmaCrawlWorker.java @@ -362,7 +362,13 @@ public final class plasmaCrawlWorker extends Thread { // reserve cache entry plasmaHTCache.Entry htCache = cacheManager.newEntry(requestDate, depth, url, name, requestHeader, res.status, res.responseHeader, initiator, profile); - + if (!htCache.cacheFile.getCanonicalPath().startsWith(cacheManager.cachePath.getAbsolutePath())) { + // if the response has not the right file type then reject file + remote.close(); + log.logInfo("REJECTED URL " + url.toString() + " because of an invalid file path '" + htCache.cacheFile.getAbsolutePath() + "'."); + return; + } + // request has been placed and result has been returned. work off response File cacheFile = cacheManager.getCachePath(url); try { @@ -386,6 +392,7 @@ public final class plasmaCrawlWorker extends Thread { // if the response has not the right file type then reject file remote.close(); log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for URL " + url.toString()); + return; } // enQueue new entry with response header if (profile != null) {