*) more correct robots.txt validation

- isDisallowed now uses getFile instead of getPath

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1870 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent f046e1814a
commit 734d18f283

@ -67,6 +67,11 @@ import de.anomic.server.logging.serverLog;
* It only parses the Deny Part, yet.
* *
* http://www.robotstxt.org/wc/norobots-rfc.html
*
* TODO:
* - On the request attempt resulted in temporary failure a robot
* should defer visits to the site until such time as the resource
* can be retrieved.
*/
public final class robotsParser{
@ -263,7 +268,7 @@ public final class robotsParser{
}
}
if (robotsTxt4Host.isDisallowed(nexturl.getPath())) {
if (robotsTxt4Host.isDisallowed(nexturl.getFile())) {
return true;
}
return false;

Loading…
Cancel
Save