reduced load on solr; no seed update in Status and no exists-check in

HTTPLoader in case of redirects, that can be done using the htcache.
pull/1/head
Michael Peter Christen 12 years ago
parent 7ee71c2354
commit 2fd7bbb450

@ -117,7 +117,7 @@ public class Status
} }
// update seed info // update seed info
sb.updateMySeed(); //sb.updateMySeed(); // don't do this here. if Solr is stuck, this makes it worse. And it prevents that we can click on the Thread Dump menu.
final boolean adminaccess = sb.adminAuthenticated(header) >= 2; final boolean adminaccess = sb.adminAuthenticated(header) >= 2;
if ( adminaccess ) { if ( adminaccess ) {

@ -33,7 +33,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.crawler.HarvestProcess; import net.yacy.crawler.data.Cache;
import net.yacy.crawler.data.CrawlProfile; import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.Latency; import net.yacy.crawler.data.Latency;
import net.yacy.crawler.data.ZURL.FailCategory; import net.yacy.crawler.data.ZURL.FailCategory;
@ -170,12 +170,10 @@ public final class HTTPLoader {
throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown."); throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
} }
// check if the url was already indexed // check if the url was already loaded
@SuppressWarnings("deprecation") if (Cache.has(redirectionUrl.hash())) { // customer request
final HarvestProcess dbname = this.sb.urlExists(ASCII.String(redirectionUrl.hash()));
if (dbname != null) { // customer request
this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode); this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname.toString()); throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
} }
// retry crawling with new url // retry crawling with new url

Loading…
Cancel
Save