*) More tolerant robots parser

- converting tabs to spaces
   - cutting of '*' in the disallow section

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1056 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 79818a320f
commit 9649d08171

@ -121,9 +121,12 @@ public final class robotsParser{
// cutting off comments at the line end
pos = line.indexOf("#");
if (pos != -1) {
line = line.substring(0,pos);
line = line.substring(0,pos).trim();
}
// replacing all tabs with spaces
line = line.replaceAll("\t"," ");
// getting out the robots name
pos = line.indexOf(" ");
if (pos != -1) {
@ -138,9 +141,18 @@ public final class robotsParser{
// cutting off comments at the line end
pos = line.indexOf("#");
if (pos != -1) {
line = line.substring(0,pos);
line = line.substring(0,pos).trim();
}
// cutting of tailing *
if (line.endsWith("*")) {
line = line.substring(0,line.length()-1);
}
// replacing all tabs with spaces
line = line.replaceAll("\t"," ");
// getting the path
pos = line.indexOf(" ");
if (pos != -1) {
// getting the path

Loading…
Cancel
Save