From 2fd7bbb45016c05889bee51d3b3a7adcdb4f14aa Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 12 Jun 2013 00:14:55 +0200 Subject: [PATCH] reduced load on solr; no seed update in Status and no exists-check in HTTPLoader in case of redirects, that can be done using the htcache. --- htroot/Status.java | 2 +- source/net/yacy/crawler/retrieval/HTTPLoader.java | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/htroot/Status.java b/htroot/Status.java index a82005b47..aeee28e36 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -117,7 +117,7 @@ public class Status } // update seed info - sb.updateMySeed(); + //sb.updateMySeed(); // don't do this here. if Solr is stuck, this makes it worse. And it prevents that we can click on the Thread Dump menu. final boolean adminaccess = sb.adminAuthenticated(header) >= 2; if ( adminaccess ) { diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java index e12e9693e..708ab342b 100644 --- a/source/net/yacy/crawler/retrieval/HTTPLoader.java +++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java @@ -33,7 +33,7 @@ import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.http.HTTPClient; -import net.yacy.crawler.HarvestProcess; +import net.yacy.crawler.data.Cache; import net.yacy.crawler.data.CrawlProfile; import net.yacy.crawler.data.Latency; import net.yacy.crawler.data.ZURL.FailCategory; @@ -170,12 +170,10 @@ public final class HTTPLoader { throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown."); } - // check if the url was already indexed - @SuppressWarnings("deprecation") - final HarvestProcess dbname = this.sb.urlExists(ASCII.String(redirectionUrl.hash())); - if (dbname != null) { // customer request + // check if the url was already loaded + if (Cache.has(redirectionUrl.hash())) { // customer request this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode); - throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname.toString()); + throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache"); } // retry crawling with new url