reduced load on solr; no seed update in Status and no exists-check in

HTTPLoader in case of redirects, that can be done using the htcache.
pull/1/head
Michael Peter Christen 12 years ago
parent 7ee71c2354
commit 2fd7bbb450

@ -117,7 +117,7 @@ public class Status
}
// update seed info
sb.updateMySeed();
//sb.updateMySeed(); // don't do this here. if Solr is stuck, this makes it worse. And it prevents that we can click on the Thread Dump menu.
final boolean adminaccess = sb.adminAuthenticated(header) >= 2;
if ( adminaccess ) {

@ -33,7 +33,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.crawler.HarvestProcess;
import net.yacy.crawler.data.Cache;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.Latency;
import net.yacy.crawler.data.ZURL.FailCategory;
@ -170,12 +170,10 @@ public final class HTTPLoader {
throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
}
// check if the url was already indexed
@SuppressWarnings("deprecation")
final HarvestProcess dbname = this.sb.urlExists(ASCII.String(redirectionUrl.hash()));
if (dbname != null) { // customer request
// check if the url was already loaded
if (Cache.has(redirectionUrl.hash())) { // customer request
this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname.toString());
throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
}
// retry crawling with new url

Loading…
Cancel
Save