|
|
@ -163,10 +163,12 @@ public final class robotsParser{
|
|
|
|
|
|
|
|
|
|
|
|
// generating the hostname:poart string needed to do a DB lookup
|
|
|
|
// generating the hostname:poart string needed to do a DB lookup
|
|
|
|
String urlHostPort = nexturl.getHost() + ":" + ((nexturl.getPort()==-1)?80:nexturl.getPort());
|
|
|
|
String urlHostPort = nexturl.getHost() + ":" + ((nexturl.getPort()==-1)?80:nexturl.getPort());
|
|
|
|
urlHostPort = urlHostPort.toLowerCase();
|
|
|
|
urlHostPort = urlHostPort.toLowerCase().intern();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plasmaCrawlRobotsTxt.Entry robotsTxt4Host = null;
|
|
|
|
|
|
|
|
synchronized(urlHostPort) {
|
|
|
|
// doing a DB lookup to determine if the robots data is already available
|
|
|
|
// doing a DB lookup to determine if the robots data is already available
|
|
|
|
plasmaCrawlRobotsTxt.Entry robotsTxt4Host = plasmaSwitchboard.robots.getEntry(urlHostPort);
|
|
|
|
robotsTxt4Host = plasmaSwitchboard.robots.getEntry(urlHostPort);
|
|
|
|
|
|
|
|
|
|
|
|
// if we have not found any data or the data is older than 7 days, we need to load it from the remote server
|
|
|
|
// if we have not found any data or the data is older than 7 days, we need to load it from the remote server
|
|
|
|
if (
|
|
|
|
if (
|
|
|
@ -223,6 +225,7 @@ public final class robotsParser{
|
|
|
|
robotsTxt4Host = plasmaSwitchboard.robots.addEntry(urlHostPort,denyPath,new Date(),modDate,eTag);
|
|
|
|
robotsTxt4Host = plasmaSwitchboard.robots.addEntry(urlHostPort,denyPath,new Date(),modDate,eTag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (robotsTxt4Host.isDisallowed(nexturl.getPath())) {
|
|
|
|
if (robotsTxt4Host.isDisallowed(nexturl.getPath())) {
|
|
|
|
return true;
|
|
|
|
return true;
|
|
|
|