From bcc623a84320564dcacf2830288713620ebae230 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 12 Jul 2013 16:24:56 +0200 Subject: [PATCH] refactoring of load_delay: this is a matter of client identification --- htroot/CrawlCheck_p.java | 7 +++---- htroot/Crawler_p.java | 3 +-- htroot/DictionaryLoader_p.java | 13 ++++++------- htroot/Load_RSS_p.java | 3 +-- htroot/ViewFile.java | 3 +-- htroot/ViewImage.java | 3 +-- htroot/api/getpageinfo.java | 3 +-- htroot/api/getpageinfo_p.java | 3 +-- htroot/api/webstructure.java | 3 +-- .../yacy/cora/protocol/ClientIdentification.java | 5 +++++ source/net/yacy/crawler/data/CrawlQueues.java | 3 +-- source/net/yacy/crawler/retrieval/RSSLoader.java | 3 +-- source/net/yacy/peers/operation/yacyRelease.java | 3 +-- source/net/yacy/search/Switchboard.java | 4 ++-- source/net/yacy/search/index/Segment.java | 3 +-- 15 files changed, 27 insertions(+), 35 deletions(-) diff --git a/htroot/CrawlCheck_p.java b/htroot/CrawlCheck_p.java index 12e9b10e3..aaa4247d0 100644 --- a/htroot/CrawlCheck_p.java +++ b/htroot/CrawlCheck_p.java @@ -28,7 +28,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.robots.RobotsTxtEntry; @@ -88,19 +87,19 @@ public class CrawlCheck_p { robotsEntry = sb.robots.getEntry(u, sb.peers.myBotIDs()); if (robotsEntry == null) { prop.put("table_list_" + row + "_robots", "no robots"); - prop.put("table_list_" + row + "_crawldelay", CrawlQueues.queuedMinLoadDelay + " ms"); + prop.put("table_list_" + row + "_crawldelay", ClientIdentification.minLoadDelay() + " ms"); prop.put("table_list_" + row + "_sitemap", ""); } else { robotsAllowed = !robotsEntry.isDisallowed(u); prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed")); - prop.put("table_list_" + row + "_crawldelay", Math.max(CrawlQueues.queuedMinLoadDelay, robotsEntry.getCrawlDelayMillis()) + " ms"); + prop.put("table_list_" + row + "_crawldelay", Math.max(ClientIdentification.minLoadDelay(), robotsEntry.getCrawlDelayMillis()) + " ms"); prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true)); } // try to load the url if (robotsAllowed) try { Request request = sb.loader.request(u, true, false); - final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); if (response == null) { prop.put("table_list_" + row + "_access", "no response"); } else { diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 0f7138ffc..97db138a9 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -43,7 +43,6 @@ import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.SpaceExceededException; import net.yacy.crawler.CrawlSwitchboard; import net.yacy.crawler.data.CrawlProfile; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.data.ZURL.FailCategory; import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.SitemapImporter; @@ -288,7 +287,7 @@ public class Crawler_p { // download document Document scraper; try { - scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); // get links and generate filter for (DigestURI u: scraper.getAnchors().keySet()) { newRootURLs.add(u); diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java index 0c555e114..4c3025658 100644 --- a/htroot/DictionaryLoader_p.java +++ b/htroot/DictionaryLoader_p.java @@ -27,7 +27,6 @@ import net.yacy.cora.geo.OpenGeoDBLocation; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.retrieval.Response; import net.yacy.document.LibraryProvider; import net.yacy.kelondro.data.meta.DigestURI; @@ -67,7 +66,7 @@ public class DictionaryLoader_p { if (post.containsKey("geon0Load")) { // load from the net try { - final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1)); @@ -109,7 +108,7 @@ public class DictionaryLoader_p { if (post.containsKey("geon1Load")) { // load from the net try { - final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file()); LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1)); @@ -151,7 +150,7 @@ public class DictionaryLoader_p { if (post.containsKey("geon2Load")) { // load from the net try { - final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file()); LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000)); @@ -193,7 +192,7 @@ public class DictionaryLoader_p { if (post.containsKey("geo1Load")) { // load from the net try { - final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname); @@ -236,7 +235,7 @@ public class DictionaryLoader_p { if (post.containsKey("drw0Load")) { // load from the net try { - final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file()); LibraryProvider.activateDeReWo(); @@ -280,7 +279,7 @@ public class DictionaryLoader_p { if (post.containsKey("pnd0Load")) { // load from the net try { - final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file()); LibraryProvider.activatePND(); diff --git a/htroot/Load_RSS_p.java b/htroot/Load_RSS_p.java index 67380481a..2e8ab5887 100644 --- a/htroot/Load_RSS_p.java +++ b/htroot/Load_RSS_p.java @@ -42,7 +42,6 @@ import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.SpaceExceededException; import net.yacy.crawler.HarvestProcess; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.retrieval.RSSLoader; import net.yacy.crawler.retrieval.Response; import net.yacy.data.WorkTables; @@ -267,7 +266,7 @@ public class Load_RSS_p { RSSReader rss = null; if (url != null) try { prop.put("url", url.toNormalform(true)); - final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] resource = response == null ? null : response.getContent(); rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource); } catch (final IOException e) { diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 223589113..5482bac56 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -45,7 +45,6 @@ import net.yacy.cora.lod.vocabulary.YaCyMetadata; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.crawler.data.Cache; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.retrieval.Response; import net.yacy.document.Condenser; import net.yacy.document.Document; @@ -169,7 +168,7 @@ public class ViewFile { Response response = null; try { - response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); } catch (final IOException e) { prop.put("error", "4"); prop.put("error_errorText", "error loading resource: " + e.getMessage()); diff --git a/htroot/ViewImage.java b/htroot/ViewImage.java index a904c2cb2..fa208a445 100644 --- a/htroot/ViewImage.java +++ b/htroot/ViewImage.java @@ -39,7 +39,6 @@ import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.storage.ConcurrentARC; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.data.URLLicense; import net.yacy.document.ImageParser; import net.yacy.kelondro.data.meta.DigestURI; @@ -105,7 +104,7 @@ public class ViewImage { if (image == null) { byte[] resourceb = null; if (url != null) try { - resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); } catch (final IOException e) { ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage()); } diff --git a/htroot/api/getpageinfo.java b/htroot/api/getpageinfo.java index da63daa7d..8a1e79ecc 100644 --- a/htroot/api/getpageinfo.java +++ b/htroot/api/getpageinfo.java @@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.robots.RobotsTxtEntry; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.repository.Blacklist.BlacklistType; @@ -97,7 +96,7 @@ public class getpageinfo { } net.yacy.document.Document scraper = null; if (u != null) try { - scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); } catch (final IOException e) { ConcurrentLog.logException(e); // bad things are possible, i.e. that the Server responds with "403 Bad Behavior" diff --git a/htroot/api/getpageinfo_p.java b/htroot/api/getpageinfo_p.java index d9d7c1281..73c2a4443 100644 --- a/htroot/api/getpageinfo_p.java +++ b/htroot/api/getpageinfo_p.java @@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.robots.RobotsTxtEntry; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.repository.Blacklist.BlacklistType; @@ -97,7 +96,7 @@ public class getpageinfo_p { } net.yacy.document.Document scraper = null; if (u != null) try { - scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); } catch (final IOException e) { ConcurrentLog.logException(e); // bad things are possible, i.e. that the Server responds with "403 Bad Behavior" diff --git a/htroot/api/webstructure.java b/htroot/api/webstructure.java index 9b7d30467..ba95001d3 100644 --- a/htroot/api/webstructure.java +++ b/htroot/api/webstructure.java @@ -35,7 +35,6 @@ import net.yacy.cora.order.Base64Order; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.kelondro.data.citation.CitationReference; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.rwi.IndexCell; @@ -98,7 +97,7 @@ public class webstructure { prop.put("references", 1); net.yacy.document.Document scraper = null; if (url != null) try { - scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); } catch (final IOException e) { ConcurrentLog.logException(e); } diff --git a/source/net/yacy/cora/protocol/ClientIdentification.java b/source/net/yacy/cora/protocol/ClientIdentification.java index 56dcfa480..5695c1be2 100644 --- a/source/net/yacy/cora/protocol/ClientIdentification.java +++ b/source/net/yacy/cora/protocol/ClientIdentification.java @@ -27,6 +27,7 @@ package net.yacy.cora.protocol; public class ClientIdentification { + public static final long MIN_LOAD_DELAY = 500; public static final int DEFAULT_TIMEOUT = 10000; public static final int minimumLocalDeltaInit = 10; // the minimum time difference between access of the same local domain public static final int minimumGlobalDeltaInit = 500; // the minimum time difference between access of the same global domain @@ -118,4 +119,8 @@ public class ClientIdentification { return location; } + + public static long minLoadDelay() { + return MIN_LOAD_DELAY; + } } diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 321cff71c..4ca3ed039 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -63,7 +63,6 @@ import net.yacy.search.SwitchboardConstants; public class CrawlQueues { - public static final long queuedMinLoadDelay = 500; private static final String ERROR_DB_FILENAME = "urlError4.db"; private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db"; @@ -654,7 +653,7 @@ public class CrawlQueues { try { this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING); final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle())); - final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); if (response == null) { this.request.setStatus("error", WorkflowJob.STATUS_FINISHED); if (CrawlQueues.this.log.isFine()) { diff --git a/source/net/yacy/crawler/retrieval/RSSLoader.java b/source/net/yacy/crawler/retrieval/RSSLoader.java index 5a0c65bc9..444315b91 100644 --- a/source/net/yacy/crawler/retrieval/RSSLoader.java +++ b/source/net/yacy/crawler/retrieval/RSSLoader.java @@ -45,7 +45,6 @@ import net.yacy.cora.storage.ComparableARC; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.SpaceExceededException; import net.yacy.crawler.HarvestProcess; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.data.WorkTables; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.data.meta.DigestURI; @@ -71,7 +70,7 @@ public class RSSLoader extends Thread { public void run() { RSSReader rss = null; try { - final Response response = this.sb.loader.load(this.sb.loader.request(this.urlf, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = this.sb.loader.load(this.sb.loader.request(this.urlf, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); final byte[] resource = response == null ? null : response.getContent(); rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource); } catch (final MalformedURLException e) { diff --git a/source/net/yacy/peers/operation/yacyRelease.java b/source/net/yacy/peers/operation/yacyRelease.java index 18e24f38f..433bd2d45 100644 --- a/source/net/yacy/peers/operation/yacyRelease.java +++ b/source/net/yacy/peers/operation/yacyRelease.java @@ -53,7 +53,6 @@ import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.storage.Files; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.document.Document; import net.yacy.document.parser.tarParser; import net.yacy.kelondro.data.meta.DigestURI; @@ -239,7 +238,7 @@ public final class yacyRelease extends yacyVersion { try { final DigestURI uri = location.getLocationURL(); Thread.currentThread().setName("allReleaseFrom - host " + uri.getHost()); // makes it more easy to see which release blocks process in thread dump - scraper = Switchboard.getSwitchboard().loader.loadDocument(uri, CacheStrategy.NOCACHE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + scraper = Switchboard.getSwitchboard().loader.loadDocument(uri, CacheStrategy.NOCACHE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); } catch (final IOException e) { return null; } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 7cede61f7..7d6ab8372 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2879,7 +2879,7 @@ public final class Switchboard extends serverSwitch { // get a scraper to get the title Document scraper; try { - scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); } catch (IOException e) { return "scraper cannot load URL: " + e.getMessage(); } @@ -2986,7 +2986,7 @@ public final class Switchboard extends serverSwitch { String urlName = url.toNormalform(true); Thread.currentThread().setName("Switchboard.addToIndex:" + urlName); try { - final Response response = Switchboard.this.loader.load(request, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); + final Response response = Switchboard.this.loader.load(request, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); if (response == null) { throw new IOException("response == null"); } diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 806693397..64f4e6019 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -55,7 +55,6 @@ import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.LookAheadIterator; import net.yacy.cora.util.SpaceExceededException; -import net.yacy.crawler.data.CrawlQueues; import net.yacy.crawler.retrieval.Response; import net.yacy.document.Condenser; import net.yacy.document.Document; @@ -812,7 +811,7 @@ public class Segment { try { // parse the resource - final Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT)); + final Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT)); if (document == null) { // delete just the url entry fulltext().remove(urlhash);