* Robots.txt: don't interpret Crawl-Delays for other robots

fixes: http://forum.yacy-websuche.de/viewtopic.php?f=5&t=1647


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5398 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 16 years ago
parent 243e73f53b
commit 0881190b19

@ -147,15 +147,18 @@ public final class robotsParser {
if (isRule4YaCyAgent) rule4YaCyFound = true;
}
} else if (lineUpper.startsWith(ROBOTS_CRAWL_DELAY)) {
pos = line.indexOf(" ");
if (pos != -1) {
try {
// the crawl delay can be a float number and means number of seconds
crawlDelayMillis = (long) (1000.0 * Float.parseFloat(line.substring(pos).trim()));
} catch (final NumberFormatException e) {
// invalid crawling delay
}
}
inBlock = true;
if (isRule4YaCyAgent || isRule4AllAgents) {
pos = line.indexOf(" ");
if (pos != -1) {
try {
// the crawl delay can be a float number and means number of seconds
crawlDelayMillis = (long) (1000.0 * Float.parseFloat(line.substring(pos).trim()));
} catch (final NumberFormatException e) {
// invalid crawling delay
}
}
}
} else if (lineUpper.startsWith(ROBOTS_DISALLOW) ||
lineUpper.startsWith(ROBOTS_ALLOW)) {
inBlock = true;

Loading…
Cancel
Save