reduced load on solr; no seed update in Status and no exists-check in

HTTPLoader in case of redirects, that can be done using the htcache.
12 years ago · 2fd7bbb450
parent 7ee71c2354
commit 2fd7bbb450
2 changed files with 5 additions and 7 deletions
--- a/htroot/Status.java
+++ b/htroot/Status.java
@ -117,7 +117,7 @@ public class Status
        }
        // update seed info
-        sb.updateMySeed();
+        //sb.updateMySeed(); // don't do this here. if Solr is stuck, this makes it worse. And it prevents that we can click on the Thread Dump menu.
        final boolean adminaccess = sb.adminAuthenticated(header) >= 2;
        if ( adminaccess ) {
--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@ -33,7 +33,7 @@ import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.cora.protocol.http.HTTPClient;
-import net.yacy.crawler.HarvestProcess;
+import net.yacy.crawler.data.Cache;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.Latency;
 import net.yacy.crawler.data.ZURL.FailCategory;
@ -170,12 +170,10 @@ public final class HTTPLoader {
                    throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
                }
-                // check if the url was already indexed
+                // check if the url was already loaded
-                @SuppressWarnings("deprecation")
+                if (Cache.has(redirectionUrl.hash())) { // customer request
                final HarvestProcess dbname = this.sb.urlExists(ASCII.String(redirectionUrl.hash()));
                if (dbname != null) { // customer request
                    this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
-                    throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in db " + dbname.toString());
+                    throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
                }
                // retry crawling with new url