*) retry crawling without content-encoding if the content-encoding header was not correct

See: http://www.yacy-forum.de/viewtopic.php?p=26917#26917

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2811 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 18 years ago
parent 52466067d8
commit a5b9b514c1

@ -409,10 +409,14 @@ public final class CrawlWorker extends AbstractCrawlWorker {
"'. Retrying request."); "'. Retrying request.");
failreason = plasmaCrawlEURL.DENIED_CONNECTION_BIND_EXCEPTION; failreason = plasmaCrawlEURL.DENIED_CONNECTION_BIND_EXCEPTION;
retryCrawling = true; retryCrawling = true;
} else if ((errorMsg != null) && (errorMsg.indexOf("Corrupt GZIP trailer") >= 0)) { } else if ((errorMsg != null) && (
(errorMsg.indexOf("Corrupt GZIP trailer") >= 0) ||
(errorMsg.indexOf("Not in GZIP format") >= 0)
)) {
this.log.logWarning("CRAWLER Problems detected while receiving gzip encoded content from '" + this.url.toString() + this.log.logWarning("CRAWLER Problems detected while receiving gzip encoded content from '" + this.url.toString() +
"'. Retrying request without using gzip content encoding."); "'. Retrying request without using gzip content encoding.");
failreason = plasmaCrawlEURL.DENIED_CONTENT_DECODING_ERROR; failreason = plasmaCrawlEURL.DENIED_CONTENT_DECODING_ERROR;
this.acceptEncoding = null;
retryCrawling = true; retryCrawling = true;
} else if ((errorMsg != null) && (errorMsg.indexOf("Read timed out") >= 0)) { } else if ((errorMsg != null) && (errorMsg.indexOf("Read timed out") >= 0)) {
this.log.logWarning("CRAWLER Read timeout while receiving content from '" + this.url.toString() + this.log.logWarning("CRAWLER Read timeout while receiving content from '" + this.url.toString() +

Loading…
Cancel
Save