From 2cd695f3768e48740de7de3b1b0f82a96c0ec8a6 Mon Sep 17 00:00:00 2001 From: theli Date: Wed, 7 Sep 2005 11:49:53 +0000 Subject: [PATCH] *) Bugfix path-entries of robots.txt were not decoded correctly git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@676 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/data/robotsParser.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/de/anomic/data/robotsParser.java b/source/de/anomic/data/robotsParser.java index dcfeff2a8..2b78945fe 100644 --- a/source/de/anomic/data/robotsParser.java +++ b/source/de/anomic/data/robotsParser.java @@ -47,6 +47,8 @@ package de.anomic.data; import java.lang.String; import java.net.MalformedURLException; import java.net.URL; +import java.net.URLDecoder; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; @@ -128,8 +130,11 @@ public final class robotsParser{ // getting the path String path = line.substring(pos).trim(); + // unencoding all special charsx + path = URLDecoder.decode(path,"UTF-8"); + // escaping all occurences of ; because this char is used as special char in the Robots DB - path = path.replaceAll(";","%3B"); + path = path.replaceAll(";","%3B"); // adding it to the pathlist deny.add(path);