Init remote crawler on demand

If remote crawl option is not activated, skip init of remoteCrawlJob to save the resources of queue and ideling thread.
Deploy of the remoteCrawlJob deferred on activation of the option.
pull/8/head
reger 10 years ago
parent dbf9e3503d
commit 3e742d1e34

@ -143,7 +143,7 @@ public class ConfigNetwork_p
prop.put("commit", commit);
// write remote crawl request settings
prop.put("crawlResponse", sb.getConfigBool("crawlResponse", false) ? "1" : "0");
prop.put("crawlResponse", sb.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false) ? "1" : "0");
final long RTCbusySleep =
Math
.max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100));
@ -166,7 +166,7 @@ public class ConfigNetwork_p
prop.put("indexReceiveSearchChecked", indexReceiveSearch);
// set seed information directly
sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool("crawlResponse", false));
sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false));
sb.peers.mySeed().setFlagAcceptRemoteIndex(indexReceive);
// set p2p/robinson mode flags and values

@ -56,7 +56,7 @@ public class RemoteCrawl_p {
boolean crawlResponse = post.get("crawlResponse", "off").equals("on");
// read remote crawl request settings
sb.setConfig("crawlResponse", crawlResponse);
sb.initRemoteCrawler(crawlResponse);
}
if (post.containsKey("acceptCrawlLimit")) {
@ -70,7 +70,7 @@ public class RemoteCrawl_p {
}
// set seed information directly
sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool("crawlResponse", false));
sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false));
// write remote crawl request settings
prop.put("disabled", !sb.peers.mySeed().isActive() && !sb.peers.mySeed().getFlagAcceptRemoteCrawl() ? 1 : 0);

@ -356,7 +356,7 @@ public class Status
prop.putNum(
"remoteTriggeredCrawlQueueSize",
sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) != null ? sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount() : 0);
prop.put(
"remoteTriggeredCrawlPaused",
sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");

@ -105,7 +105,7 @@ public class status_p {
prop.put("limitCrawlState", STATE_RUNNING);
//remote crawl queue
prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
prop.putNum("remoteCrawlSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) != null ? sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount() : 0);
prop.put("remoteCrawlState", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? STATE_PAUSED : STATE_RUNNING);
//noload crawl queue

@ -143,7 +143,7 @@ public final class crawlReceipt {
return prop;
}
if ("fill".equals(result)) try {
if ("fill".equals(result) && sb.crawlQueues.delegatedURL != null) try {
// put new entry into database
sb.index.fulltext().putMetadata(entry);
ResultURLs.stack(ASCII.String(entry.url().hash()), entry.url().getHost(), youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
@ -159,8 +159,10 @@ public final class crawlReceipt {
return prop;
}
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case
sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1);
if (sb.crawlQueues.delegatedURL != null) { // the delegated work is transformed into an error case
sb.crawlQueues.delegatedURL.remove(entry.hash());
sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1);
}
//switchboard.noticeURL.remove(receivedUrlhash);
prop.put("delay", "3600");
return prop;

@ -83,7 +83,7 @@ public class urls {
}
// place url to notice-url db
sb.crawlQueues.delegatedURL.put(ASCII.String(entry.url().hash()), entry.url());
if (sb.crawlQueues.delegatedURL != null) sb.crawlQueues.delegatedURL.put(ASCII.String(entry.url().hash()), entry.url());
// create RSS entry
prop.put("item_" + c + "_title", "");

@ -21,6 +21,7 @@
package net.yacy.crawler;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
@ -90,6 +91,9 @@ public interface Balancer {
*/
public int size();
public int getOnDemandLimit();
public boolean getExceed134217727();
/**
* check if stack is empty
* @return true iff size() == 0

@ -203,6 +203,15 @@ public class HostBalancer implements Balancer {
return true;
}
@Override
public int getOnDemandLimit() {
return this.onDemandLimit;
}
@Override
public boolean getExceed134217727() {
return this.exceed134217727;
}
/**
* push a request to one of the host queues. If the queue does not exist, it is created
* @param entry

@ -544,4 +544,14 @@ public class HostQueue implements Balancer {
return cel;
}
@Override
public int getOnDemandLimit() {
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}
@Override
public boolean getExceed134217727() {
return this.exceed134217727;
}
}

@ -76,6 +76,16 @@ public class LegacyBalancer implements Balancer {
private final List<Map.Entry<String, byte[]>> zeroWaitingCandidates;
private final Random random; // used to alternate between choose-from-maxstack or choose from any zero-waiting
@Override
public int getOnDemandLimit() {
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}
@Override
public boolean getExceed134217727() {
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}
private static class HostHandles {
public String hosthash;
public HandleSet handleSet;

@ -72,7 +72,7 @@ public class CrawlQueues {
private final Switchboard sb;
private final Loader[] worker;
private final ArrayBlockingQueue<Request> workerQueue;
private final ArrayList<String> remoteCrawlProviderHashes;
private ArrayList<String> remoteCrawlProviderHashes;
public NoticedURL noticeURL;
public ErrorCache errorURL;
@ -83,7 +83,7 @@ public class CrawlQueues {
final int maxWorkers = (int) sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 10);
this.worker = new Loader[maxWorkers];
this.workerQueue = new ArrayBlockingQueue<Request>(200);
this.remoteCrawlProviderHashes = new ArrayList<String>();
this.remoteCrawlProviderHashes = null;
// start crawling management
log.config("Starting Crawling Management");
@ -92,10 +92,16 @@ public class CrawlQueues {
log.config("Opening errorURL..");
this.errorURL = new ErrorCache(sb.index.fulltext());
log.config("Opening delegatedURL..");
this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
log.config("Finishted Startup of Crawling Management");
this.delegatedURL = null;
}
public void initRemoteCrawlQueues () {
if (this.remoteCrawlProviderHashes == null) this.remoteCrawlProviderHashes = new ArrayList<String>();
if (this.delegatedURL == null) {
this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
log.config("Finishted Startup of Crawling Management");
}
}
/**
* Relocation is necessary if the user switches the network.
* Because this object is part of the scheduler we cannot simply close that object and create a new one.
@ -106,10 +112,10 @@ public class CrawlQueues {
// removed pending requests
this.workerQueue.clear();
this.errorURL.clearCache();
this.remoteCrawlProviderHashes.clear();
if (this.remoteCrawlProviderHashes != null) this.remoteCrawlProviderHashes.clear();
this.noticeURL.close();
this.noticeURL = new NoticedURL(newQueuePath, sb.getConfigInt("crawler.onDemandLimit", 1000), this.sb.exceed134217727);
this.delegatedURL.clear();
if (this.delegatedURL != null) this.delegatedURL.clear();
}
public synchronized void close() {
@ -130,16 +136,16 @@ public class CrawlQueues {
}
}
this.noticeURL.close();
this.delegatedURL.clear();
if (this.delegatedURL != null) this.delegatedURL.clear();
}
public void clear() {
// wait for all workers to finish
this.workerQueue.clear();
for (final Loader w: this.worker) if (w != null) w.interrupt();
this.remoteCrawlProviderHashes.clear();
if (this.remoteCrawlProviderHashes != null) this.remoteCrawlProviderHashes.clear();
this.noticeURL.clear();
this.delegatedURL.clear();
if (this.delegatedURL != null) this.delegatedURL.clear();
}
/**
@ -148,7 +154,7 @@ public class CrawlQueues {
* @return if the hash exists, the name of the database is returned, otherwise null is returned
*/
public HarvestProcess exists(final byte[] hash) {
if (this.delegatedURL.containsKey(ASCII.String(hash))) {
if (this.delegatedURL != null && this.delegatedURL.containsKey(ASCII.String(hash))) {
return HarvestProcess.DELEGATED;
}
//if (this.noticeURL.existsInStack(hash)) {
@ -181,7 +187,7 @@ public class CrawlQueues {
public void removeURL(final byte[] hash) {
assert hash != null && hash.length == 12;
this.noticeURL.removeByURLHash(hash);
this.delegatedURL.remove(hash);
if (this.delegatedURL != null) this.delegatedURL.remove(hash);
}
public int removeHosts(final Set<String> hosthashes) {
@ -194,9 +200,11 @@ public class CrawlQueues {
if (urlhash == null || urlhash.length == 0) {
return null;
}
DigestURL u = this.delegatedURL.get(ASCII.String(urlhash));
if (u != null) {
return u;
if (this.delegatedURL != null) {
DigestURL u = this.delegatedURL.get(ASCII.String(urlhash));
if (u != null) {
return u;
}
}
for (final DigestURL url: activeWorkerEntries().keySet()) {
if (Base64Order.enhancedCoder.equal(url.hash(), urlhash)) {
@ -456,7 +464,7 @@ public class CrawlQueues {
// check if we have an entry in the provider list, otherwise fill the list
Seed seed;
if (this.remoteCrawlProviderHashes.isEmpty()) {
if (this.remoteCrawlProviderHashes != null && this.remoteCrawlProviderHashes.isEmpty()) {
if (this.sb.peers != null && this.sb.peers.sizeConnected() > 0) {
final Iterator<Seed> e = DHTSelection.getProvidesRemoteCrawlURLs(this.sb.peers);
while (e.hasNext()) {
@ -467,14 +475,14 @@ public class CrawlQueues {
}
}
}
if (this.remoteCrawlProviderHashes.isEmpty()) {
if (this.remoteCrawlProviderHashes == null || this.remoteCrawlProviderHashes.isEmpty()) {
return false;
}
// take one entry from the provider list and load the entries from the remote peer
seed = null;
String hash = null;
while (seed == null && !this.remoteCrawlProviderHashes.isEmpty()) {
while (seed == null && (this.remoteCrawlProviderHashes != null && !this.remoteCrawlProviderHashes.isEmpty())) {
hash = this.remoteCrawlProviderHashes.remove(this.remoteCrawlProviderHashes.size() - 1);
if (hash == null) {
continue;

@ -46,6 +46,7 @@ import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.MemoryControl;
public class NoticedURL {
@ -55,8 +56,9 @@ public class NoticedURL {
private Balancer coreStack; // links found by crawling to depth-1
private Balancer limitStack; // links found by crawling at target depth
private Balancer remoteStack; // links from remote crawl orders
private Balancer remoteStack; // links from remote crawl orders (init on demand)
private Balancer noloadStack; // links that are not passed to a loader; the index will be generated from the Request entry
private final File cachePath;
protected NoticedURL(
final File cachePath,
@ -64,16 +66,28 @@ public class NoticedURL {
final boolean exceed134217727) {
ConcurrentLog.info("NoticedURL", "START CREATING STACKS at " + cachePath.toString());
ConcurrentLog.info("NoticedURL", "opening CrawlerCoreStacks..");
this.cachePath = cachePath;
this.coreStack = new HostBalancer(new File(cachePath, "CrawlerCoreStacks"), onDemandLimit, exceed134217727);
ConcurrentLog.info("NoticedURL", "opening CrawlerLimitStacks..");
this.limitStack = new HostBalancer(new File(cachePath, "CrawlerLimitStacks"), onDemandLimit, exceed134217727);
ConcurrentLog.info("NoticedURL", "opening CrawlerRemoteStacks..");
this.remoteStack = new HostBalancer(new File(cachePath, "CrawlerRemoteStacks"), onDemandLimit, exceed134217727);
this.remoteStack = null; // init on demand (on first push)
ConcurrentLog.info("NoticedURL", "opening CrawlerNoLoadStacks..");
this.noloadStack = new HostBalancer(new File(cachePath, "CrawlerNoLoadStacks"), onDemandLimit, exceed134217727);
ConcurrentLog.info("NoticedURL", "FINISHED CREATING STACKS at " + cachePath.toString());
}
/**
* Init Remote crawl stack, internally called on 1st push to remoteStack
*/
protected void initRemoteStack() {
if (this.remoteStack == null && !MemoryControl.shortStatus()) {
ConcurrentLog.info("NoticedURL", "opening CrawlerRemoteStacks..");
this.remoteStack = new HostBalancer(new File(this.cachePath, "CrawlerRemoteStacks"), this.coreStack.getOnDemandLimit(), this.coreStack.getExceed134217727());
}
}
public void clear() {
ConcurrentLog.info("NoticedURL", "CLEARING ALL STACKS");
if (this.coreStack != null) this.coreStack.clear();
@ -113,7 +127,6 @@ public class NoticedURL {
}
public int size() {
// this does not count the overhang stack size
return ((this.coreStack == null) ? 0 : this.coreStack.size()) + ((this.limitStack == null) ? 0 : this.limitStack.size()) + ((this.remoteStack == null) ? 0 : this.remoteStack.size());
}
@ -127,7 +140,7 @@ public class NoticedURL {
public boolean isEmpty() {
if (!isEmptyLocal()) return false;
if (!this.remoteStack.isEmpty()) return false;
if (this.remoteStack != null && !this.remoteStack.isEmpty()) return false;
return true;
}
@ -155,8 +168,7 @@ public class NoticedURL {
return
this.coreStack.has(urlhashb) ||
this.limitStack.has(urlhashb) ||
//overhangStack.has(urlhashb) ||
this.remoteStack.has(urlhashb) ||
(this.remoteStack != null && this.remoteStack.has(urlhashb)) ||
this.noloadStack.has(urlhashb);
}
@ -169,11 +181,16 @@ public class NoticedURL {
public String push(final StackType stackType, final Request entry, CrawlProfile profile, final RobotsTxt robots) {
try {
switch (stackType) {
case LOCAL: return this.coreStack.push(entry, profile, robots);
case LOCAL: return this.coreStack.push(entry, profile, robots);
case GLOBAL: return this.limitStack.push(entry, profile, robots);
case REMOTE: return this.remoteStack.push(entry, profile, robots);
case REMOTE: {
if (this.remoteStack == null) {
this.initRemoteStack();
}
return (this.remoteStack != null) ? this.remoteStack.push(entry, profile, robots) : "remote crawler stack deactivated";
}
case NOLOAD: return this.noloadStack.push(entry, profile, robots);
default: return "stack type unknown";
default: return "stack type unknown";
}
} catch (final Exception er) {
ConcurrentLog.logException(er);
@ -186,7 +203,7 @@ public class NoticedURL {
try {if ((entry = this.noloadStack.get(urlhash)) != null) return entry;} catch (final IOException e) {}
try {if ((entry = this.coreStack.get(urlhash)) != null) return entry;} catch (final IOException e) {}
try {if ((entry = this.limitStack.get(urlhash)) != null) return entry;} catch (final IOException e) {}
try {if ((entry = this.remoteStack.get(urlhash)) != null) return entry;} catch (final IOException e) {}
try {if (this.remoteStack != null && (entry = this.remoteStack.get(urlhash)) != null) return entry;} catch (final IOException e) {}
return null;
}
@ -204,7 +221,7 @@ public class NoticedURL {
try {ret |= this.noloadStack.remove(urlHashes) > 0;} catch (final IOException e) {}
try {ret |= this.coreStack.remove(urlHashes) > 0;} catch (final IOException e) {}
try {ret |= this.limitStack.remove(urlHashes) > 0;} catch (final IOException e) {}
try {ret |= this.remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {}
try {ret |= this.remoteStack != null && this.remoteStack.remove(urlHashes) > 0;} catch (final IOException e) {}
return ret;
} catch (final SpaceExceededException e) {
ConcurrentLog.logException(e);
@ -217,7 +234,7 @@ public class NoticedURL {
try {removed += this.noloadStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {}
try {removed += this.coreStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {}
try {removed += this.limitStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {}
try {removed += this.remoteStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {}
if (this.remoteStack != null) try {removed += this.remoteStack.removeAllByProfileHandle(handle, timeout);} catch (final IOException e) {}
return removed;
}
@ -226,7 +243,7 @@ public class NoticedURL {
removed += this.noloadStack.removeAllByHostHashes(hosthashes);
removed += this.coreStack.removeAllByHostHashes(hosthashes);
removed += this.limitStack.removeAllByHostHashes(hosthashes);
removed += this.remoteStack.removeAllByHostHashes(hosthashes);
if (this.remoteStack != null) removed += this.remoteStack.removeAllByHostHashes(hosthashes);
return removed;
}
@ -238,7 +255,7 @@ public class NoticedURL {
switch (stackType) {
case LOCAL: return this.coreStack.getDomainStackHosts(robots);
case GLOBAL: return this.limitStack.getDomainStackHosts(robots);
case REMOTE: return this.remoteStack.getDomainStackHosts(robots);
case REMOTE: return (this.remoteStack != null) ? this.remoteStack.getDomainStackHosts(robots) : null;
case NOLOAD: return this.noloadStack.getDomainStackHosts(robots);
default: return null;
}
@ -254,7 +271,7 @@ public class NoticedURL {
switch (stackType) {
case LOCAL: return this.coreStack.getDomainStackReferences(host, maxcount, maxtime);
case GLOBAL: return this.limitStack.getDomainStackReferences(host, maxcount, maxtime);
case REMOTE: return this.remoteStack.getDomainStackReferences(host, maxcount, maxtime);
case REMOTE: return (this.remoteStack != null) ? this.remoteStack.getDomainStackReferences(host, maxcount, maxtime) : null;
case NOLOAD: return this.noloadStack.getDomainStackReferences(host, maxcount, maxtime);
default: return null;
}
@ -264,7 +281,7 @@ public class NoticedURL {
switch (stackType) {
case LOCAL: return pop(this.coreStack, delay, cs, robots);
case GLOBAL: return pop(this.limitStack, delay, cs, robots);
case REMOTE: return pop(this.remoteStack, delay, cs, robots);
case REMOTE: return (this.remoteStack != null) ? pop(this.remoteStack, delay, cs, robots) : null;
case NOLOAD: return pop(this.noloadStack, false, cs, robots);
default: return null;
}
@ -285,14 +302,25 @@ public class NoticedURL {
}
public void clear(final StackType stackType) {
ConcurrentLog.info("NoticedURL", "CLEARING STACK " + stackType);
ConcurrentLog.info("NoticedURL", "CLEARING STACK " + stackType);
switch (stackType) {
case LOCAL: this.coreStack.clear(); break;
case GLOBAL: this.limitStack.clear(); break;
case REMOTE: this.remoteStack.clear(); break;
case NOLOAD: this.noloadStack.clear(); break;
default: return;
}
case LOCAL:
this.coreStack.clear();
break;
case GLOBAL:
this.limitStack.clear();
break;
case REMOTE:
if (this.remoteStack != null) {
this.remoteStack.clear();
}
break;
case NOLOAD:
this.noloadStack.clear();
break;
default:
return;
}
}
private static Request pop(final Balancer balancer, final boolean delay, final CrawlSwitchboard cs, final RobotsTxt robots) throws IOException {
@ -331,7 +359,7 @@ public class NoticedURL {
try {switch (stackType) {
case LOCAL: return this.coreStack.iterator();
case GLOBAL: return this.limitStack.iterator();
case REMOTE: return this.remoteStack.iterator();
case REMOTE: return (this.remoteStack != null) ? this.remoteStack.iterator() : null;
case NOLOAD: return this.noloadStack.iterator();
default: return null;
}} catch (final IOException e) {

@ -1051,32 +1051,9 @@ public final class Switchboard extends serverSwitch {
20000,
0),
10000);
deployThread(
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL,
"Remote Crawl Job",
"thread that performes a single crawl/indexing step triggered by a remote peer",
"/IndexCreateQueues_p.html?stack=REMOTE",
new InstantBusyThread(
this.crawlQueues,
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_START,
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_JOBCOUNT,
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_FREEMEM,
0,
0),
10000);
deployThread(
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER,
"Remote Crawl URL Loader",
"thread that loads remote crawl lists from other peers",
null,
new InstantBusyThread(
this.crawlQueues,
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_START,
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_JOBCOUNT,
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_FREEMEM,
10000,
10000),
10000); // error here?
this.initRemoteCrawler(this.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false));
deployThread(
SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL,
"Local Crawl",
@ -1472,21 +1449,77 @@ public final class Switchboard extends serverSwitch {
// propagate to crawler
final BusyThread rct = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, newBusySleep);
setConfig(
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP,
Math.min(10000, newBusySleep * 10));
rct.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 1000));
rct
.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, 10000));
setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP,
Math.min(10000, newBusySleep * 10));
if (rct != null) {
rct.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 1000));
rct.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, 10000));
}
// propagate to loader
final BusyThread rcl = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER);
setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, newBusySleep * 4);
setConfig(
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP,
Math.min(10000, newBusySleep * 20));
rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000));
rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000));
setConfig(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP,
Math.min(10000, newBusySleep * 20));
if (rcl != null) {
rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000));
rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000));
}
}
/**
* Initialisize and perform all settings to enable remote crawls
* (if remote crawl is not in use, save the resources)
* @param activate true=enable, false=disable
*/
public void initRemoteCrawler(final boolean activate) {
this.setConfig(SwitchboardConstants.CRAWLJOB_REMOTE, activate);
this.peers.mySeed().setFlagAcceptRemoteCrawl(activate);
if (activate) {
this.crawlQueues.initRemoteCrawlQueues();
BusyThread rct = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
if (rct == null) {
deployThread(
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL,
"Remote Crawl Job",
"thread that performes a single crawl/indexing step triggered by a remote peer",
"/IndexCreateQueues_p.html?stack=REMOTE",
new InstantBusyThread(
this.crawlQueues,
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_START,
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_JOBCOUNT,
SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_METHOD_FREEMEM,
0,
0),
10000);
rct = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
}
rct.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 1000));
rct.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_IDLESLEEP, 10000));
BusyThread rcl = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER);
if (rcl == null) {
deployThread(
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER,
"Remote Crawl URL Loader",
"thread that loads remote crawl lists from other peers",
null,
new InstantBusyThread(
this.crawlQueues,
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_START,
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_JOBCOUNT,
SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_FREEMEM,
10000,
10000),
10000);
rcl = getThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER);
}
rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000));
rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000));
}
}
public void initMessages() throws IOException {
@ -2160,7 +2193,7 @@ public final class Switchboard extends serverSwitch {
public int cleanupJobSize() {
int c = 1; // run this always!
if ( (this.crawlQueues.delegatedURL.size() > 1000) ) {
if (this.crawlQueues.delegatedURL != null && (this.crawlQueues.delegatedURL.size() > 1000) ) {
c++;
}
if ( (this.crawlQueues.errorURL.stackSize() > 1000) ) {
@ -2256,7 +2289,7 @@ public final class Switchboard extends serverSwitch {
// clean up delegated stack
checkInterruption();
if ( (this.crawlQueues.delegatedURL.size() > 1000) ) {
if (this.crawlQueues.delegatedURL != null && (this.crawlQueues.delegatedURL.size() > 1000) ) {
if ( this.log.isFine() ) {
this.log.fine("Cleaning Delegated-URLs report stack, "
+ this.crawlQueues.delegatedURL.size()
@ -3778,7 +3811,7 @@ public final class Switchboard extends serverSwitch {
mySeed.setFlagDirectConnect(true);
mySeed.setLastSeenUTC();
mySeed.put(Seed.UTC, GenericFormatter.UTCDiffString());
mySeed.setFlagAcceptRemoteCrawl(getConfigBool("crawlResponse", true));
mySeed.setFlagAcceptRemoteCrawl(getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false));
mySeed.setFlagAcceptRemoteIndex(getConfigBool("allowReceiveIndex", true));
mySeed.setFlagSSLAvailable(this.getHttpServer() != null && this.getHttpServer().withSSL() && getConfigBool("server.https", false));
if (mySeed.getFlagSSLAvailable()) mySeed.put(Seed.PORTSSL, Integer.toString(getPublicPort("port.ssl", 8443)));

@ -113,6 +113,7 @@ public final class SwitchboardConstants {
*
* @see Switchboard#CRAWLJOB_REMOTE_CRAWL_LOADER
*/
public static final String CRAWLJOB_REMOTE = "crawlResponse"; // enable/disable response to remote crawl requests
public static final String CRAWLJOB_REMOTE_CRAWL_LOADER = "60_remotecrawlloader";
public static final String CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_START = "remoteCrawlLoaderJob";
public static final String CRAWLJOB_REMOTE_CRAWL_LOADER_METHOD_JOBCOUNT = null;

Loading…
Cancel
Save