diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index 91e7444f9..94eaaf391 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -196,12 +196,17 @@ public class RobotsTxt { } } else { final byte[] robotsTxt = (byte[]) result[DOWNLOAD_ROBOTS_TXT]; - Log.logInfo("RobotsTxt", "robots of " + robotsURL.toNormalform(true, true) + ":\n" + UTF8.String(robotsTxt)); // debug TODO remove - final RobotsTxtParser parserResult = new RobotsTxtParser(robotsTxt, thisAgents); - ArrayList denyPath = parserResult.denyList(); + Log.logInfo("RobotsTxt", "robots of " + robotsURL.toNormalform(true, true) + ":\n" + ((robotsTxt == null) ? "null" : UTF8.String(robotsTxt))); // debug TODO remove + RobotsTxtParser parserResult; + ArrayList denyPath; if (((Boolean) result[DOWNLOAD_ACCESS_RESTRICTED]).booleanValue()) { + parserResult = new RobotsTxtParser(thisAgents); + // create virtual deny path denyPath = new ArrayList(); denyPath.add("/"); + } else { + parserResult = new RobotsTxtParser(thisAgents, robotsTxt); + denyPath = parserResult.denyList(); } // store the data into the robots DB