diff --git a/htroot/ConfigNetwork_p.java b/htroot/ConfigNetwork_p.java index 341e1a02d..554878943 100644 --- a/htroot/ConfigNetwork_p.java +++ b/htroot/ConfigNetwork_p.java @@ -143,7 +143,7 @@ public class ConfigNetwork_p prop.put("commit", commit); // write remote crawl request settings - prop.put("crawlResponse", sb.getConfigBool("crawlResponse", false) ? "1" : "0"); + prop.put("crawlResponse", sb.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false) ? "1" : "0"); final long RTCbusySleep = Math .max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100)); @@ -166,7 +166,7 @@ public class ConfigNetwork_p prop.put("indexReceiveSearchChecked", indexReceiveSearch); // set seed information directly - sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool("crawlResponse", false)); + sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false)); sb.peers.mySeed().setFlagAcceptRemoteIndex(indexReceive); // set p2p/robinson mode flags and values diff --git a/htroot/RemoteCrawl_p.java b/htroot/RemoteCrawl_p.java index bbdb9f0d7..1117d47cf 100644 --- a/htroot/RemoteCrawl_p.java +++ b/htroot/RemoteCrawl_p.java @@ -56,7 +56,7 @@ public class RemoteCrawl_p { boolean crawlResponse = post.get("crawlResponse", "off").equals("on"); // read remote crawl request settings - sb.setConfig("crawlResponse", crawlResponse); + sb.initRemoteCrawler(crawlResponse); } if (post.containsKey("acceptCrawlLimit")) { @@ -70,7 +70,7 @@ public class RemoteCrawl_p { } // set seed information directly - sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool("crawlResponse", false)); + sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false)); // write remote crawl request settings prop.put("disabled", !sb.peers.mySeed().isActive() && !sb.peers.mySeed().getFlagAcceptRemoteCrawl() ? 1 : 0); diff --git a/htroot/Status.java b/htroot/Status.java index 5278c0ded..3e44717fb 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -356,7 +356,7 @@ public class Status prop.putNum( "remoteTriggeredCrawlQueueSize", - sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount()); + sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) != null ? sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount() : 0); prop.put( "remoteTriggeredCrawlPaused", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0"); diff --git a/htroot/api/status_p.java b/htroot/api/status_p.java index a46021f3b..919a4ae12 100644 --- a/htroot/api/status_p.java +++ b/htroot/api/status_p.java @@ -105,7 +105,7 @@ public class status_p { prop.put("limitCrawlState", STATE_RUNNING); //remote crawl queue - prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount()); + prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) != null ? sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount() : 0); prop.put("remoteCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? STATE_PAUSED : STATE_RUNNING); //noload crawl queue diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index cdf5fc0a2..553b6bf6a 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -143,7 +143,7 @@ public final class crawlReceipt { return prop; } - if ("fill".equals(result)) try { + if ("fill".equals(result) && sb.crawlQueues.delegatedURL != null) try { // put new entry into database sb.index.fulltext().putMetadata(entry); ResultURLs.stack(ASCII.String(entry.url().hash()), entry.url().getHost(), youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); @@ -159,8 +159,10 @@ public final class crawlReceipt { return prop; } - sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case - sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1); + if (sb.crawlQueues.delegatedURL != null) { // the delegated work is transformed into an error case + sb.crawlQueues.delegatedURL.remove(entry.hash()); + sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1); + } //switchboard.noticeURL.remove(receivedUrlhash); prop.put("delay", "3600"); return prop; diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java index 02232c845..6fd5b48a9 100644 --- a/htroot/yacy/urls.java +++ b/htroot/yacy/urls.java @@ -83,7 +83,7 @@ public class urls { } // place url to notice-url db - sb.crawlQueues.delegatedURL.put(ASCII.String(entry.url().hash()), entry.url()); + if (sb.crawlQueues.delegatedURL != null) sb.crawlQueues.delegatedURL.put(ASCII.String(entry.url().hash()), entry.url()); // create RSS entry prop.put("item_" + c + "_title", ""); diff --git a/source/net/yacy/crawler/Balancer.java b/source/net/yacy/crawler/Balancer.java index 35b70aecb..164a26c79 100644 --- a/source/net/yacy/crawler/Balancer.java +++ b/source/net/yacy/crawler/Balancer.java @@ -21,6 +21,7 @@ package net.yacy.crawler; +import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.List; @@ -90,6 +91,9 @@ public interface Balancer { */ public int size(); + public int getOnDemandLimit(); + + public boolean getExceed134217727(); /** * check if stack is empty * @return true iff size() == 0 diff --git a/source/net/yacy/crawler/HostBalancer.java b/source/net/yacy/crawler/HostBalancer.java index f3421ddb6..cbd65141e 100644 --- a/source/net/yacy/crawler/HostBalancer.java +++ b/source/net/yacy/crawler/HostBalancer.java @@ -202,7 +202,16 @@ public class HostBalancer implements Balancer { } return true; } - + + @Override + public int getOnDemandLimit() { + return this.onDemandLimit; + } + + @Override + public boolean getExceed134217727() { + return this.exceed134217727; + } /** * push a request to one of the host queues. If the queue does not exist, it is created * @param entry diff --git a/source/net/yacy/crawler/HostQueue.java b/source/net/yacy/crawler/HostQueue.java index d106d91e3..6157efb80 100644 --- a/source/net/yacy/crawler/HostQueue.java +++ b/source/net/yacy/crawler/HostQueue.java @@ -544,4 +544,14 @@ public class HostQueue implements Balancer { return cel; } + @Override + public int getOnDemandLimit() { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public boolean getExceed134217727() { + return this.exceed134217727; + } + } diff --git a/source/net/yacy/crawler/LegacyBalancer.java b/source/net/yacy/crawler/LegacyBalancer.java index 42fab99c4..c679e859c 100644 --- a/source/net/yacy/crawler/LegacyBalancer.java +++ b/source/net/yacy/crawler/LegacyBalancer.java @@ -76,6 +76,16 @@ public class LegacyBalancer implements Balancer { private final List> zeroWaitingCandidates; private final Random random; // used to alternate between choose-from-maxstack or choose from any zero-waiting + @Override + public int getOnDemandLimit() { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public boolean getExceed134217727() { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + private static class HostHandles { public String hosthash; public HandleSet handleSet; diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 68b36c013..454ba15db 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -72,7 +72,7 @@ public class CrawlQueues { private final Switchboard sb; private final Loader[] worker; private final ArrayBlockingQueue workerQueue; - private final ArrayList remoteCrawlProviderHashes; + private ArrayList remoteCrawlProviderHashes; public NoticedURL noticeURL; public ErrorCache errorURL; @@ -83,7 +83,7 @@ public class CrawlQueues { final int maxWorkers = (int) sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10); this.worker = new Loader[maxWorkers]; this.workerQueue = new ArrayBlockingQueue(200); - this.remoteCrawlProviderHashes = new ArrayList(); + this.remoteCrawlProviderHashes = null; // start crawling management log.config("Starting Crawling Management"); @@ -92,10 +92,16 @@ public class CrawlQueues { log.config("Opening errorURL.."); this.errorURL = new ErrorCache(sb.index.fulltext()); log.config("Opening delegatedURL.."); - this.delegatedURL = new ConcurrentHashMap(); - log.config("Finishted Startup of Crawling Management"); + this.delegatedURL = null; + } + + public void initRemoteCrawlQueues () { + if (this.remoteCrawlProviderHashes == null) this.remoteCrawlProviderHashes = new ArrayList(); + if (this.delegatedURL == null) { + this.delegatedURL = new ConcurrentHashMap(); + log.config("Finishted Startup of Crawling Management"); + } } - /** * Relocation is necessary if the user switches the network. * Because this object is part of the scheduler we cannot simply close that object and create a new one. @@ -106,10 +112,10 @@ public class CrawlQueues { // removed pending requests this.workerQueue.clear(); this.errorURL.clearCache(); - this.remoteCrawlProviderHashes.clear(); + if (this.remoteCrawlProviderHashes != null) this.remoteCrawlProviderHashes.clear(); this.noticeURL.close(); this.noticeURL = new NoticedURL(newQueuePath, sb.getConfigInt("crawler.onDemandLimit", 1000), this.sb.exceed134217727); - this.delegatedURL.clear(); + if (this.delegatedURL != null) this.delegatedURL.clear(); } public synchronized void close() { @@ -130,16 +136,16 @@ public class CrawlQueues { } } this.noticeURL.close(); - this.delegatedURL.clear(); + if (this.delegatedURL != null) this.delegatedURL.clear(); } public void clear() { // wait for all workers to finish this.workerQueue.clear(); for (final Loader w: this.worker) if (w != null) w.interrupt(); - this.remoteCrawlProviderHashes.clear(); + if (this.remoteCrawlProviderHashes != null) this.remoteCrawlProviderHashes.clear(); this.noticeURL.clear(); - this.delegatedURL.clear(); + if (this.delegatedURL != null) this.delegatedURL.clear(); } /** @@ -148,7 +154,7 @@ public class CrawlQueues { * @return if the hash exists, the name of the database is returned, otherwise null is returned */ public HarvestProcess exists(final byte[] hash) { - if (this.delegatedURL.containsKey(ASCII.String(hash))) { + if (this.delegatedURL != null && this.delegatedURL.containsKey(ASCII.String(hash))) { return HarvestProcess.DELEGATED; } //if (this.noticeURL.existsInStack(hash)) { @@ -181,7 +187,7 @@ public class CrawlQueues { public void removeURL(final byte[] hash) { assert hash != null && hash.length == 12; this.noticeURL.removeByURLHash(hash); - this.delegatedURL.remove(hash); + if (this.delegatedURL != null) this.delegatedURL.remove(hash); } public int removeHosts(final Set hosthashes) { @@ -194,9 +200,11 @@ public class CrawlQueues { if (urlhash == null || urlhash.length == 0) { return null; } - DigestURL u = this.delegatedURL.get(ASCII.String(urlhash)); - if (u != null) { - return u; + if (this.delegatedURL != null) { + DigestURL u = this.delegatedURL.get(ASCII.String(urlhash)); + if (u != null) { + return u; + } } for (final DigestURL url: activeWorkerEntries().keySet()) { if (Base64Order.enhancedCoder.equal(url.hash(), urlhash)) { @@ -456,7 +464,7 @@ public class CrawlQueues { // check if we have an entry in the provider list, otherwise fill the list Seed seed; - if (this.remoteCrawlProviderHashes.isEmpty()) { + if (this.remoteCrawlProviderHashes != null && this.remoteCrawlProviderHashes.isEmpty()) { if (this.sb.peers != null && this.sb.peers.sizeConnected() > 0) { final Iterator e = DHTSelection.getProvidesRemoteCrawlURLs(this.sb.peers); while (e.hasNext()) { @@ -467,14 +475,14 @@ public class CrawlQueues { } } } - if (this.remoteCrawlProviderHashes.isEmpty()) { + if (this.remoteCrawlProviderHashes == null || this.remoteCrawlProviderHashes.isEmpty()) { return false; } // take one entry from the provider list and load the entries from the remote peer seed = null; String hash = null; - while (seed == null && !this.remoteCrawlProviderHashes.isEmpty()) { + while (seed == null && (this.remoteCrawlProviderHashes != null && !this.remoteCrawlProviderHashes.isEmpty())) { hash = this.remoteCrawlProviderHashes.remove(this.remoteCrawlProviderHashes.size() - 1); if (hash == null) { continue; diff --git a/source/net/yacy/crawler/data/NoticedURL.java b/source/net/yacy/crawler/data/NoticedURL.java index 1c9673acd..baeab151b 100644 --- a/source/net/yacy/crawler/data/NoticedURL.java +++ b/source/net/yacy/crawler/data/NoticedURL.java @@ -46,6 +46,7 @@ import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.robots.RobotsTxt; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.RowHandleSet; +import net.yacy.kelondro.util.MemoryControl; public class NoticedURL { @@ -55,8 +56,9 @@ public class NoticedURL { private Balancer coreStack; // links found by crawling to depth-1 private Balancer limitStack; // links found by crawling at target depth - private Balancer remoteStack; // links from remote crawl orders + private Balancer remoteStack; // links from remote crawl orders (init on demand) private Balancer noloadStack; // links that are not passed to a loader; the index will be generated from the Request entry + private final File cachePath; protected NoticedURL( final File cachePath, @@ -64,16 +66,28 @@ public class NoticedURL { final boolean exceed134217727) { ConcurrentLog.info("NoticedURL", "START CREATING STACKS at " + cachePath.toString()); ConcurrentLog.info("NoticedURL", "opening CrawlerCoreStacks.."); + this.cachePath = cachePath; this.coreStack = new HostBalancer(new File(cachePath, "CrawlerCoreStacks"), onDemandLimit, exceed134217727); ConcurrentLog.info("NoticedURL", "opening CrawlerLimitStacks.."); this.limitStack = new HostBalancer(new File(cachePath, "CrawlerLimitStacks"), onDemandLimit, exceed134217727); - ConcurrentLog.info("NoticedURL", "opening CrawlerRemoteStacks.."); - this.remoteStack = new HostBalancer(new File(cachePath, "CrawlerRemoteStacks"), onDemandLimit, exceed134217727); + + this.remoteStack = null; // init on demand (on first push) + ConcurrentLog.info("NoticedURL", "opening CrawlerNoLoadStacks.."); this.noloadStack = new HostBalancer(new File(cachePath, "CrawlerNoLoadStacks"), onDemandLimit, exceed134217727); ConcurrentLog.info("NoticedURL", "FINISHED CREATING STACKS at " + cachePath.toString()); } + /** + * Init Remote crawl stack, internally called on 1st push to remoteStack + */ + protected void initRemoteStack() { + if (this.remoteStack == null && !MemoryControl.shortStatus()) { + ConcurrentLog.info("NoticedURL", "opening CrawlerRemoteStacks.."); + this.remoteStack = new HostBalancer(new File(this.cachePath, "CrawlerRemoteStacks"), this.coreStack.getOnDemandLimit(), this.coreStack.getExceed134217727()); + } + } + public void clear() { ConcurrentLog.info("NoticedURL", "CLEARING ALL STACKS"); if (this.coreStack != null) this.coreStack.clear(); @@ -113,7 +127,6 @@ public class NoticedURL { } public int size() { - // this does not count the overhang stack size return ((this.coreStack == null) ? 0 : this.coreStack.size()) + ((this.limitStack == null) ? 0 : this.limitStack.size()) + ((this.remoteStack == null) ? 0 : this.remoteStack.size()); } @@ -127,7 +140,7 @@ public class NoticedURL { public boolean isEmpty() { if (!isEmptyLocal()) return false; - if (!this.remoteStack.isEmpty()) return false; + if (this.remoteStack != null && !this.remoteStack.isEmpty()) return false; return true; } @@ -155,8 +168,7 @@ public class NoticedURL { return this.coreStack.has(urlhashb) || this.limitStack.has(urlhashb) || - //overhangStack.has(urlhashb) || - this.remoteStack.has(urlhashb) || + (this.remoteStack != null && this.remoteStack.has(urlhashb)) || this.noloadStack.has(urlhashb); } @@ -169,11 +181,16 @@ public class NoticedURL { public String push(final StackType stackType, final Request entry, CrawlProfile profile, final RobotsTxt robots) { try { switch (stackType) { - case LOCAL: return this.coreStack.push(entry, profile, robots); + case LOCAL: return this.coreStack.push(entry, profile, robots); case GLOBAL: return this.limitStack.push(entry, profile, robots); - case REMOTE: return this.remoteStack.push(entry, profile, robots); + case REMOTE: { + if (this.remoteStack == null) { + this.initRemoteStack(); + } + return (this.remoteStack != null) ? this.remoteStack.push(entry, profile, robots) : "remote crawler stack deactivated"; + } case NOLOAD: return this.noloadStack.push(entry, profile, robots); - default: return "stack type unknown"; + default: return "stack type unknown"; } } catch (final Exception er) { ConcurrentLog.logException(er); @@ -186,7 +203,7 @@ public class NoticedURL { try {if ((entry = this.noloadStack.get(urlhash)) != null) return entry;} catch (final IOException e) {} try {if ((entry = this.coreStack.get(urlhash)) != null) return entry;} catch (final IOException e) {} try {if ((entry = this.limitStack.get(urlhash)) != null) return entry;} catch (final IOException e) {} - try {if ((entry = this.remoteStack.get(urlhash)) != null) return entry;} catch (final IOException e) {} + try {if (this.remoteStack != null && (entry = this.remoteStack.get(urlhash)) != null) return entry;} catch (final IOException e) {} return null; } @@ -204,7 +221,7 @@ public class NoticedURL { try {ret |= this.noloadStack.remove(urlHashes) > 0;} catch (final IOException e) {} try {ret |= this.coreStack.remove(urlHashes) > 0;} catch (final IOException e) {} try {ret |= this.limitStack.remove(urlHashes) > 0;} catch (final IOException e) {} - try {ret |= this.remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {} + try {ret |= this.remoteStack != null && this.remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {} return ret; } catch (final SpaceExceededException e) { ConcurrentLog.logException(e); @@ -217,7 +234,7 @@ public class NoticedURL { try {removed += this.noloadStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {} try {removed += this.coreStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {} try {removed += this.limitStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {} - try {removed += this.remoteStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {} + if (this.remoteStack != null) try {removed += this.remoteStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {} return removed; } @@ -226,7 +243,7 @@ public class NoticedURL { removed += this.noloadStack.removeAllByHostHashes(hosthashes); removed += this.coreStack.removeAllByHostHashes(hosthashes); removed += this.limitStack.removeAllByHostHashes(hosthashes); - removed += this.remoteStack.removeAllByHostHashes(hosthashes); + if (this.remoteStack != null) removed += this.remoteStack.removeAllByHostHashes(hosthashes); return removed; } @@ -238,7 +255,7 @@ public class NoticedURL { switch (stackType) { case LOCAL: return this.coreStack.getDomainStackHosts(robots); case GLOBAL: return this.limitStack.getDomainStackHosts(robots); - case REMOTE: return this.remoteStack.getDomainStackHosts(robots); + case REMOTE: return (this.remoteStack != null) ? this.remoteStack.getDomainStackHosts(robots) : null; case NOLOAD: return this.noloadStack.getDomainStackHosts(robots); default: return null; } @@ -254,7 +271,7 @@ public class NoticedURL { switch (stackType) { case LOCAL: return this.coreStack.getDomainStackReferences(host, maxcount, maxtime); case GLOBAL: return this.limitStack.getDomainStackReferences(host, maxcount, maxtime); - case REMOTE: return this.remoteStack.getDomainStackReferences(host, maxcount, maxtime); + case REMOTE: return (this.remoteStack != null) ? this.remoteStack.getDomainStackReferences(host, maxcount, maxtime) : null; case NOLOAD: return this.noloadStack.getDomainStackReferences(host, maxcount, maxtime); default: return null; } @@ -264,7 +281,7 @@ public class NoticedURL { switch (stackType) { case LOCAL: return pop(this.coreStack, delay, cs, robots); case GLOBAL: return pop(this.limitStack, delay, cs, robots); - case REMOTE: return pop(this.remoteStack, delay, cs, robots); + case REMOTE: return (this.remoteStack != null) ? pop(this.remoteStack, delay, cs, robots) : null; case NOLOAD: return pop(this.noloadStack, false, cs, robots); default: return null; } @@ -285,14 +302,25 @@ public class NoticedURL { } public void clear(final StackType stackType) { - ConcurrentLog.info("NoticedURL", "CLEARING STACK " + stackType); + ConcurrentLog.info("NoticedURL", "CLEARING STACK " + stackType); switch (stackType) { - case LOCAL: this.coreStack.clear(); break; - case GLOBAL: this.limitStack.clear(); break; - case REMOTE: this.remoteStack.clear(); break; - case NOLOAD: this.noloadStack.clear(); break; - default: return; - } + case LOCAL: + this.coreStack.clear(); + break; + case GLOBAL: + this.limitStack.clear(); + break; + case REMOTE: + if (this.remoteStack != null) { + this.remoteStack.clear(); + } + break; + case NOLOAD: + this.noloadStack.clear(); + break; + default: + return; + } } private static Request pop(final Balancer balancer, final boolean delay, final CrawlSwitchboard cs, final RobotsTxt robots) throws IOException { @@ -331,7 +359,7 @@ public class NoticedURL { try {switch (stackType) { case LOCAL: return this.coreStack.iterator(); case GLOBAL: return this.limitStack.iterator(); - case REMOTE: return this.remoteStack.iterator(); + case REMOTE: return (this.remoteStack != null) ? this.remoteStack.iterator() : null; case NOLOAD: return this.noloadStack.iterator(); default: return null; }} catch (final IOException e) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 56308ebb2..ea4ee9685 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1051,32 +1051,9 @@ public final class Switchboard extends serverSwitch { 20000, 0), 10000); - deployThread( - SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, - "Remote Crawl Job", - "thread that performes a single crawl/indexing step triggered by a remote peer", - "/IndexCreateQueues_p.html?stack=REMOTE", - new InstantBusyThread( - this.crawlQueues, - SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_START, - SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_JOBCOUNT, - SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_FREEMEM, - 0, - 0), - 10000); - deployThread( - SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER, - "Remote Crawl URL Loader", - "thread that loads remote crawl lists from other peers", - null, - new InstantBusyThread( - this.crawlQueues, - SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_START, - SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_JOBCOUNT, - SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_FREEMEM, - 10000, - 10000), - 10000); // error here? + + this.initRemoteCrawler(this.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false)); + deployThread( SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, "Local Crawl", @@ -1472,21 +1449,77 @@ public final class Switchboard extends serverSwitch { // propagate to crawler final BusyThread rct = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, newBusySleep); - setConfig( - SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, - Math.min(10000, newBusySleep * 10)); - rct.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 1000)); - rct - .setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, 10000)); + setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, + Math.min(10000, newBusySleep * 10)); + if (rct != null) { + rct.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 1000)); + rct.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, 10000)); + } // propagate to loader final BusyThread rcl = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER); setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, newBusySleep * 4); - setConfig( - SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, - Math.min(10000, newBusySleep * 20)); - rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000)); - rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000)); + setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, + Math.min(10000, newBusySleep * 20)); + if (rcl != null) { + rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000)); + rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000)); + } + } + + /** + * Initialisize and perform all settings to enable remote crawls + * (if remote crawl is not in use, save the resources) + * @param activate true=enable, false=disable + */ + public void initRemoteCrawler(final boolean activate) { + + this.setConfig(SwitchboardConstants.CRAWLJOB_REMOTE, activate); + this.peers.mySeed().setFlagAcceptRemoteCrawl(activate); + if (activate) { + this.crawlQueues.initRemoteCrawlQueues(); + + BusyThread rct = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + if (rct == null) { + deployThread( + SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, + "Remote Crawl Job", + "thread that performes a single crawl/indexing step triggered by a remote peer", + "/IndexCreateQueues_p.html?stack=REMOTE", + new InstantBusyThread( + this.crawlQueues, + SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_START, + SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_JOBCOUNT, + SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_FREEMEM, + 0, + 0), + 10000); + rct = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + } + rct.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 1000)); + rct.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, 10000)); + + BusyThread rcl = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER); + if (rcl == null) { + deployThread( + SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER, + "Remote Crawl URL Loader", + "thread that loads remote crawl lists from other peers", + null, + new InstantBusyThread( + this.crawlQueues, + SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_START, + SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_JOBCOUNT, + SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_FREEMEM, + 10000, + 10000), + 10000); + + rcl = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER); + } + rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000)); + rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000)); + } } public void initMessages() throws IOException { @@ -2160,7 +2193,7 @@ public final class Switchboard extends serverSwitch { public int cleanupJobSize() { int c = 1; // run this always! - if ( (this.crawlQueues.delegatedURL.size() > 1000) ) { + if (this.crawlQueues.delegatedURL != null && (this.crawlQueues.delegatedURL.size() > 1000) ) { c++; } if ( (this.crawlQueues.errorURL.stackSize() > 1000) ) { @@ -2256,7 +2289,7 @@ public final class Switchboard extends serverSwitch { // clean up delegated stack checkInterruption(); - if ( (this.crawlQueues.delegatedURL.size() > 1000) ) { + if (this.crawlQueues.delegatedURL != null && (this.crawlQueues.delegatedURL.size() > 1000) ) { if ( this.log.isFine() ) { this.log.fine("Cleaning Delegated-URLs report stack, " + this.crawlQueues.delegatedURL.size() @@ -3778,7 +3811,7 @@ public final class Switchboard extends serverSwitch { mySeed.setFlagDirectConnect(true); mySeed.setLastSeenUTC(); mySeed.put(Seed.UTC, GenericFormatter.UTCDiffString()); - mySeed.setFlagAcceptRemoteCrawl(getConfigBool("crawlResponse", true)); + mySeed.setFlagAcceptRemoteCrawl(getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false)); mySeed.setFlagAcceptRemoteIndex(getConfigBool("allowReceiveIndex", true)); mySeed.setFlagSSLAvailable(this.getHttpServer() != null && this.getHttpServer().withSSL() && getConfigBool("server.https", false)); if (mySeed.getFlagSSLAvailable()) mySeed.put(Seed.PORTSSL, Integer.toString(getPublicPort("port.ssl", 8443))); diff --git a/source/net/yacy/search/SwitchboardConstants.java b/source/net/yacy/search/SwitchboardConstants.java index 928f1887c..82295624f 100644 --- a/source/net/yacy/search/SwitchboardConstants.java +++ b/source/net/yacy/search/SwitchboardConstants.java @@ -113,6 +113,7 @@ public final class SwitchboardConstants { * * @see Switchboard#CRAWLJOB_REMOTE_CRAWL_LOADER */ + public static final String CRAWLJOB_REMOTE = "crawlResponse"; // enable/disable response to remote crawl requests public static final String CRAWLJOB_REMOTE_CRAWL_LOADER = "60_remotecrawlloader"; public static final String CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_START = "remoteCrawlLoaderJob"; public static final String CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_JOBCOUNT = null;