From a99934226e5f70b330ac99ee575a5a7746a27f51 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 16 Nov 2011 13:56:31 +0000 Subject: [PATCH] more logging for debugging of robots.txt git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8046 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/RobotsTxt.java | 27 ++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index 94eaaf391..c689eb79e 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -378,7 +378,7 @@ public class RobotsTxt { } } else if (code == 401 || code == 403) { accessCompletelyRestricted = true; - if (log.isDebugEnabled()) log.debug("Access to Robots.txt not allowed on URL '" + robotsURL + "'."); + log.info("Access to Robots.txt not allowed on URL '" + robotsURL + "'., redirectionCount = " + redirectionCount); // since this is a strange case we log it all the time } else { if (log.isDebugEnabled()) log.debug("robots.txt could not be downloaded from URL '" + robotsURL + "'. [" + client.getHttpResponse().getStatusLine() + "]."); @@ -389,4 +389,29 @@ public class RobotsTxt { } return new Object[]{Boolean.valueOf(accessCompletelyRestricted),robotsTxt,eTag,lastMod}; } + + public final static void main(final String[] args) throws Exception { + + final String url = "http://www.badelatschen.net/robots.txt"; + final Object[] o = downloadRobotsTxt(new MultiProtocolURI(url), 0, null); + if (o == null) { + System.out.println("result: null"); + } else { + System.out.println("not allowed = " + ((Boolean) o[0]).toString()); + System.out.println("robots = " + ((o[1] == null) ? "null" : UTF8.String((byte[]) o[1]))); + } + System.exit(0); +/* + final HttpClient httpclient = new DefaultHttpClient(); + try { + final HttpGet httpget = new HttpGet(url); + final ResponseHandler responseHandler = new BasicResponseHandler(); + final String responseBody = httpclient.execute(httpget, responseHandler); + System.out.println(responseBody); + } finally { + httpclient.getConnectionManager().shutdown(); + } + */ + } + }