Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 12 years ago
commit 7a5574cd51

@ -136,8 +136,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
if (hash == null) return false;
//System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " remove " + hash);
try {
this.urlIndex.delete(hash);
return true;
return this.urlIndex.delete(hash);
} catch (final IOException e) {
return false;
}

@ -170,12 +170,6 @@ public final class HTTPLoader {
throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
}
// check if the url was already loaded
if (Cache.has(redirectionUrl.hash())) { // customer request
this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
}
// retry crawling with new url
request.redirectURL(redirectionUrl);
return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);

@ -2901,6 +2901,9 @@ public final class Switchboard extends serverSwitch {
return "problem crawling an ftp site: " + e.getMessage();
}
}
// remove the document from the error-db
this.crawlQueues.urlRemove(urlhash);
// get a scraper to get the title
Document scraper;

Loading…
Cancel
Save