fix for robot parser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8044 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 13 years ago
parent 017a01714d
commit 458c20ff72

@ -196,12 +196,17 @@ public class RobotsTxt {
} }
} else { } else {
final byte[] robotsTxt = (byte[]) result[DOWNLOAD_ROBOTS_TXT]; final byte[] robotsTxt = (byte[]) result[DOWNLOAD_ROBOTS_TXT];
Log.logInfo("RobotsTxt", "robots of " + robotsURL.toNormalform(true, true) + ":\n" + UTF8.String(robotsTxt)); // debug TODO remove Log.logInfo("RobotsTxt", "robots of " + robotsURL.toNormalform(true, true) + ":\n" + ((robotsTxt == null) ? "null" : UTF8.String(robotsTxt))); // debug TODO remove
final RobotsTxtParser parserResult = new RobotsTxtParser(robotsTxt, thisAgents); RobotsTxtParser parserResult;
ArrayList<String> denyPath = parserResult.denyList(); ArrayList<String> denyPath;
if (((Boolean) result[DOWNLOAD_ACCESS_RESTRICTED]).booleanValue()) { if (((Boolean) result[DOWNLOAD_ACCESS_RESTRICTED]).booleanValue()) {
parserResult = new RobotsTxtParser(thisAgents);
// create virtual deny path
denyPath = new ArrayList<String>(); denyPath = new ArrayList<String>();
denyPath.add("/"); denyPath.add("/");
} else {
parserResult = new RobotsTxtParser(thisAgents, robotsTxt);
denyPath = parserResult.denyList();
} }
// store the data into the robots DB // store the data into the robots DB

Loading…
Cancel
Save