|
|
@ -108,7 +108,8 @@ public final class robotsParser {
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
try {
|
|
|
|
while ((line = reader.readLine()) != null) {
|
|
|
|
while ((line = reader.readLine()) != null) {
|
|
|
|
line = line.trim();
|
|
|
|
// replacing all tabs with spaces
|
|
|
|
|
|
|
|
line = line.replaceAll("\t"," ").replaceAll(":"," ").trim();
|
|
|
|
lineUpper = line.toUpperCase();
|
|
|
|
lineUpper = line.toUpperCase();
|
|
|
|
|
|
|
|
|
|
|
|
if (line.length() == 0) {
|
|
|
|
if (line.length() == 0) {
|
|
|
@ -137,9 +138,6 @@ public final class robotsParser {
|
|
|
|
pos = line.indexOf(ROBOTS_COMMENT);
|
|
|
|
pos = line.indexOf(ROBOTS_COMMENT);
|
|
|
|
if (pos != -1) line = line.substring(0,pos).trim();
|
|
|
|
if (pos != -1) line = line.substring(0,pos).trim();
|
|
|
|
|
|
|
|
|
|
|
|
// replacing all tabs with spaces
|
|
|
|
|
|
|
|
line = line.replaceAll("\t"," ").replaceAll(":"," ");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// getting out the robots name
|
|
|
|
// getting out the robots name
|
|
|
|
pos = line.indexOf(" ");
|
|
|
|
pos = line.indexOf(" ");
|
|
|
|
if (pos != -1) {
|
|
|
|
if (pos != -1) {
|
|
|
@ -149,9 +147,6 @@ public final class robotsParser {
|
|
|
|
if (isRule4YaCyAgent) rule4YaCyFound = true;
|
|
|
|
if (isRule4YaCyAgent) rule4YaCyFound = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (lineUpper.startsWith(ROBOTS_CRAWL_DELAY)) {
|
|
|
|
} else if (lineUpper.startsWith(ROBOTS_CRAWL_DELAY)) {
|
|
|
|
// replacing all tabs with spaces
|
|
|
|
|
|
|
|
line = line.replaceAll("\t"," ").replaceAll(":"," ");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pos = line.indexOf(" ");
|
|
|
|
pos = line.indexOf(" ");
|
|
|
|
if (pos != -1) {
|
|
|
|
if (pos != -1) {
|
|
|
|
try {
|
|
|
|
try {
|
|
|
@ -174,9 +169,6 @@ public final class robotsParser {
|
|
|
|
// cutting of tailing *
|
|
|
|
// cutting of tailing *
|
|
|
|
if (line.endsWith("*")) line = line.substring(0,line.length()-1);
|
|
|
|
if (line.endsWith("*")) line = line.substring(0,line.length()-1);
|
|
|
|
|
|
|
|
|
|
|
|
// replacing all tabs with spaces
|
|
|
|
|
|
|
|
line = line.replaceAll("\t"," ").replaceAll(":"," ");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// getting the path
|
|
|
|
// getting the path
|
|
|
|
pos = line.indexOf(" ");
|
|
|
|
pos = line.indexOf(" ");
|
|
|
|
if (pos != -1) {
|
|
|
|
if (pos != -1) {
|
|
|
|