enhanced parallelization of local/global/remote crawling

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@197 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent a05d738ea4
commit 3d8a2ff937

@ -169,12 +169,17 @@ public class IndexCreate_p {
if (post.containsKey("clearcrawlqueue")) {
String urlHash;
int c = 0;
while (switchboard.noticeURL.localStackSize() > 0) {
urlHash = switchboard.noticeURL.localPop().hash();
if (urlHash != null) {
switchboard.noticeURL.remove(urlHash);
c++;
while (switchboard.noticeURL.coreStackSize() > 0) {
urlHash = switchboard.noticeURL.corePop().hash();
if (urlHash != null) { switchboard.noticeURL.remove(urlHash); c++; }
}
while (switchboard.noticeURL.limitStackSize() > 0) {
urlHash = switchboard.noticeURL.limitPop().hash();
if (urlHash != null) { switchboard.noticeURL.remove(urlHash); c++; }
}
while (switchboard.noticeURL.remoteStackSize() > 0) {
urlHash = switchboard.noticeURL.remotePop().hash();
if (urlHash != null) { switchboard.noticeURL.remove(urlHash); c++; }
}
prop.put("info", 3);//crawling queue cleared
prop.put("info_numEntries", c);
@ -375,12 +380,12 @@ public class IndexCreate_p {
prop.put("loader-set_list", i );
}
int localStackSize = switchboard.noticeURL.localStackSize();
int localStackSize = switchboard.noticeURL.coreStackSize();
if (localStackSize == 0) {
prop.put("crawler-queue", 0);
} else {
prop.put("crawler-queue", 1);
plasmaCrawlNURL.entry[] crawlerList = switchboard.noticeURL.localTop(20);
plasmaCrawlNURL.entry[] crawlerList = switchboard.noticeURL.coreTop(20);
prop.put("crawler-queue_num", localStackSize);//num Entries
prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent
plasmaCrawlNURL.entry urle;

@ -61,8 +61,13 @@ import de.anomic.tools.bitfield;
public class plasmaCrawlNURL extends plasmaURL {
public static final int STACK_TYPE_NULL = 0; // do not stack
public static final int STACK_TYPE_CORE = 1; // put on local stack
public static final int STACK_TYPE_LIMIT = 2; // put on global stack
public static final int STACK_TYPE_OVERHANG = 3; // put on overhang stack; links that are known but not crawled
public static final int STACK_TYPE_REMOTE = 4; // put on remote-triggered stack
private kelondroStack localStack; // links found by crawling to depth-1
private kelondroStack coreStack; // links found by crawling to depth-1
private kelondroStack limitStack; // links found by crawling at target depth
private kelondroStack overhangStack; // links found by crawling at depth+1
private kelondroStack remoteStack; // links from remote crawl orders
@ -101,9 +106,21 @@ public class plasmaCrawlNURL extends plasmaURL {
File localCrawlStack = new File(cacheStacksPath, "urlNoticeLocal0.stack");
if (localCrawlStack.exists()) {
localStack = new kelondroStack(localCrawlStack, 0);
coreStack = new kelondroStack(localCrawlStack, 0);
} else {
localStack = new kelondroStack(localCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
coreStack = new kelondroStack(localCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
}
File limitCrawlStack = new File(cacheStacksPath, "urlNoticeLimit0.stack");
if (limitCrawlStack.exists()) {
limitStack = new kelondroStack(limitCrawlStack, 0);
} else {
limitStack = new kelondroStack(limitCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
}
File overhangCrawlStack = new File(cacheStacksPath, "urlNoticeOverhang0.stack");
if (overhangCrawlStack.exists()) {
overhangStack = new kelondroStack(overhangCrawlStack, 0);
} else {
overhangStack = new kelondroStack(overhangCrawlStack, 0, new int[] {plasmaURL.urlHashLength});
}
File globalCrawlStack = new File(cacheStacksPath, "urlNoticeRemote0.stack");
if (globalCrawlStack.exists()) {
@ -114,7 +131,7 @@ public class plasmaCrawlNURL extends plasmaURL {
// init stack Index
stackIndex = new HashSet();
Iterator i = localStack.iterator();
Iterator i = coreStack.iterator();
while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
i = remoteStack.iterator();
while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
@ -134,11 +151,22 @@ public class plasmaCrawlNURL extends plasmaURL {
}
public int stackSize() {
return localStack.size() + remoteStack.size();
// this does not count the overhang stack size
return coreStack.size() + limitStack.size() + remoteStack.size();
}
public int coreStackSize() {
return coreStack.size();
}
public int limitStackSize() {
return limitStack.size();
}
public int localStackSize() {
return localStack.size();
public int overhangStackSize() {
return overhangStack.size();
}
public int remoteStackSize() {
return remoteStack.size();
}
@ -159,21 +187,24 @@ public class plasmaCrawlNURL extends plasmaURL {
// 3 = on overhang stack
// 4 = on remote stack
try {
if (stackMode == 1) {
localStack.push(new byte[][] {e.hash.getBytes()});
stackIndex.add(new String(e.hash.getBytes()));
}
if (stackMode == 4) {
remoteStack.push(new byte[][] {e.hash.getBytes()});
if (stackMode == 1) coreStack.push(new byte[][] {e.hash.getBytes()});
if (stackMode == 2) limitStack.push(new byte[][] {e.hash.getBytes()});
if (stackMode == 3) overhangStack.push(new byte[][] {e.hash.getBytes()});
if (stackMode == 4) remoteStack.push(new byte[][] {e.hash.getBytes()});
stackIndex.add(new String(e.hash.getBytes()));
}
} catch (IOException er) {
}
return e;
}
public entry localPop() { return pop(localStack); }
public entry[] localTop(int count) { return top(localStack, count); }
public entry corePop() { return pop(coreStack); }
public entry[] coreTop(int count) { return top(coreStack, count); }
public entry limitPop() { return pop(limitStack); }
public entry[] limitTop(int count) { return top(limitStack, count); }
public entry overhangPop() { return pop(overhangStack); }
public entry[] overhangTop(int count) { return top(overhangStack, count); }
public entry remotePop() { return pop(remoteStack); }
public entry[] remoteTop(int count) { return top(remoteStack, count); }
@ -344,6 +375,7 @@ public class plasmaCrawlNURL extends plasmaURL {
}
}
/*
public class kenum implements Enumeration {
// enumerates entry elements
kelondroTree.rowIterator i;
@ -362,5 +394,5 @@ public class plasmaCrawlNURL extends plasmaURL {
// enumerates entry elements
return new kenum(up, rotating);
}
*/
}

@ -323,10 +323,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
new serverInstantThread(this, "deQueue", "queueSize"), 10000);
deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack",
new serverInstantThread(cacheManager, "job", "size"), 10000);
deployThread("60_globalcrawl", "Global Crawl", "thread that performes a single crawl/indexing step of a web page for global crawling",
new serverInstantThread(this, "globalCrawlJob", "globalCrawlJobSize"), 30000);
deployThread("62_remotetriggeredcrawl", "Remote Crawl Job", "thread that performes a single crawl/indexing step triggered by a remote peer",
new serverInstantThread(this, "remoteTriggeredCrawlJob", "remoteTriggeredCrawlJobSize"), 30000);
deployThread("61_globalcrawltrigger", "Global Crawl Trigger", "thread that triggeres remote peers for crawling",
new serverInstantThread(this, "limitCrawlTriggerJob", "limitCrawlTriggerJobSize"), 30000);
deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue",
new serverInstantThread(this, "localCrawlJob", "localCrawlJobSize"), 10000);
new serverInstantThread(this, "coreCrawlJob", "coreCrawlJobSize"), 10000);
deployThread("40_peerseedcycle", "Seed-List Upload", "task that a principal peer performes to generate and upload a seed-list to a ftp account",
new serverInstantThread(yc, "publishSeedList", null), 180000);
deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task",
@ -374,7 +376,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} catch (IOException e) {}
}
private void cleanProfiles() {
if (totalSize() > 0) return;
if (queueSize() > 0) return;
Iterator i = profiles.profiles(true);
plasmaCrawlProfile.entry entry;
try {
@ -428,12 +430,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
log.logSystem("SWITCHBOARD SHUTDOWN TERMINATED");
}
/*
public int totalSize() {
return processStack.size() + cacheLoader.size() + noticeURL.stackSize();
}
*/
public int queueSize() {
return processStack.size();
return processStack.size() + cacheLoader.size() + noticeURL.stackSize();
}
public int lUrlSize() {
@ -463,7 +467,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// do one processing step
log.logDebug("DEQUEUE: cacheManager=" + ((cacheManager.idle()) ? "idle" : "busy") +
", processStack=" + processStack.size() +
", localStackSize=" + noticeURL.localStackSize() +
", coreStackSize=" + noticeURL.coreStackSize() +
", limitStackSize=" + noticeURL.limitStackSize() +
", overhangStackSize=" + noticeURL.overhangStackSize() +
", remoteStackSize=" + noticeURL.remoteStackSize());
processResourceStack((plasmaHTCache.Entry) processStack.removeFirst());
return true;
@ -529,22 +535,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
public int localCrawlJobSize() {
return noticeURL.localStackSize();
public int coreCrawlJobSize() {
return noticeURL.coreStackSize();
}
public boolean localCrawlJob() {
if (noticeURL.localStackSize() == 0) {
//log.logDebug("LocalCrawl: queue is empty");
public boolean coreCrawlJob() {
if (noticeURL.coreStackSize() == 0) {
//log.logDebug("CoreCrawl: queue is empty");
return false;
}
if (processStack.size() >= crawlSlots) {
log.logDebug("LocalCrawl: too many processes in queue, dismissed (" +
log.logDebug("CoreCrawl: too many processes in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
return false;
}
if (cacheLoader.size() >= crawlSlots) {
log.logDebug("LocalCrawl: too many loader in queue, dismissed (" +
log.logDebug("CoreCrawl: too many loader in queue, dismissed (" +
"cacheLoader=" + cacheLoader.size() + ")");
return false;
}
@ -562,17 +568,91 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
// do a local crawl (may start a global crawl)
plasmaCrawlNURL.entry nex = noticeURL.localPop();
processCrawling(nex, nex.initiator());
return true;
// do a local crawl
plasmaCrawlNURL.entry urlEntry = noticeURL.corePop();
if (urlEntry.url() == null) return false;
String profileHandle = urlEntry.profileHandle();
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile.entry profile = profiles.getEntry(profileHandle);
if (profile == null) {
log.logError("LOCALCRAWL[" + noticeURL.coreStackSize() + ", " + noticeURL.remoteStackSize() + "]: LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
return false;
}
log.logDebug("LOCALCRAWL: url=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() +
", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter() +
", permission=" + ((yacyCore.seedDB == null) ? "undefined" : (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")));
return processLocalCrawling(urlEntry, profile);
}
public int limitCrawlTriggerJobSize() {
return noticeURL.limitStackSize();
}
public boolean limitCrawlTriggerJob() {
if (noticeURL.limitStackSize() == 0) {
//log.logDebug("LimitCrawl: queue is empty");
return false;
}
// if the server is busy, we do crawling more slowly
if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {}
// if crawling was paused we have to wait until we wer notified to continue
synchronized(this.crawlingPausedSync) {
if (this.crawlingIsPaused) {
try {
this.crawlingPausedSync.wait();
}
catch (InterruptedException e){ return false;}
}
}
public int globalCrawlJobSize() {
// start a global crawl, if possible
plasmaCrawlNURL.entry urlEntry = noticeURL.limitPop();
if (urlEntry.url() == null) return false;
String profileHandle = urlEntry.profileHandle();
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile.entry profile = profiles.getEntry(profileHandle);
if (profile == null) {
log.logError("REMOTECRAWLTRIGGER[" + noticeURL.coreStackSize() + ", " + noticeURL.remoteStackSize() + "]: LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
return false;
}
log.logDebug("plasmaSwitchboard.limitCrawlTriggerJob: url=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() +
", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter() +
", permission=" + ((yacyCore.seedDB == null) ? "undefined" : (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")));
boolean tryRemote =
(profile.remoteIndexing()) /* granted */ &&
(urlEntry.initiator() != null) && (!(urlEntry.initiator().equals(plasmaURL.dummyHash))) /* not proxy */ &&
((yacyCore.seedDB.mySeed.isSenior()) ||
(yacyCore.seedDB.mySeed.isPrincipal())) /* qualified */;
if (tryRemote) {
boolean success = processRemoteCrawlTrigger(urlEntry);
if (success) return true;
}
// alternatively do a local crawl
if (processStack.size() >= crawlSlots) {
log.logDebug("LimitCrawl: too many processes in queue, dismissed (" +
"processStack=" + processStack.size() + ")");
return false;
}
if (cacheLoader.size() >= crawlSlots) {
log.logDebug("LimitCrawl: too many loader in queue, dismissed (" +
"cacheLoader=" + cacheLoader.size() + ")");
return false;
}
processLocalCrawling(urlEntry, profile);
return false;
}
public int remoteTriggeredCrawlJobSize() {
return noticeURL.remoteStackSize();
}
public boolean globalCrawlJob() {
public boolean remoteTriggeredCrawlJob() {
// work off crawl requests that had been placed by other peers to our crawl stack
// do nothing if either there are private processes to be done
@ -586,9 +666,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
"processStack=" + processStack.size() + ")");
return false;
}
if (noticeURL.localStackSize() > 0) {
if (noticeURL.coreStackSize() > 0) {
log.logDebug("GlobalCrawl: any local crawl is in queue, dismissed (" +
"localStackSize=" + noticeURL.localStackSize() + ")");
"coreStackSize=" + noticeURL.coreStackSize() + ")");
return false;
}
@ -606,9 +686,20 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
// we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
plasmaCrawlNURL.entry nex = noticeURL.remotePop();
processCrawling(nex, nex.initiator());
return true;
plasmaCrawlNURL.entry urlEntry = noticeURL.remotePop();
if (urlEntry.url() == null) return false;
String profileHandle = urlEntry.profileHandle();
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile.entry profile = profiles.getEntry(profileHandle);
if (profile == null) {
log.logError("REMOTETRIGGEREDCRAWL[" + noticeURL.coreStackSize() + ", " + noticeURL.remoteStackSize() + "]: LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
return false;
}
log.logDebug("plasmaSwitchboard.remoteTriggeredCrawlJob: url=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() +
", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter() +
", permission=" + ((yacyCore.seedDB == null) ? "undefined" : (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")));
return processLocalCrawling(urlEntry, profile);
}
private void processResourceStack(plasmaHTCache.Entry entry) {
@ -687,7 +778,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
log.logInfo("CRAWL: ADDED " + c + " LINKS FROM " + entry.url.toString() +
", NEW CRAWL STACK SIZE IS " + noticeURL.localStackSize());
", NEW CRAWL STACK SIZE IS " + noticeURL.coreStackSize());
}
// create index
@ -839,6 +930,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// store information
boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)));
boolean global =
(profile.remoteIndexing()) /* granted */ &&
(currentdepth == profile.generalDepth()) /* leaf node */ &&
(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
((yacyCore.seedDB.mySeed.isSenior()) ||
(yacyCore.seedDB.mySeed.isPrincipal())) /* qualified */;
noticeURL.newEntry(initiatorHash, /* initiator, needed for p2p-feedback */
nexturl, /* url clear text string */
loadDate, /* load date */
@ -848,7 +946,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
currentdepth, /*depth so far*/
0, /*anchors, default value */
0, /*forkfactor, default value */
((local) ? 1 : 4) /*local/remote stack*/
((global) ? plasmaCrawlNURL.STACK_TYPE_LIMIT :
((local) ? plasmaCrawlNURL.STACK_TYPE_CORE : plasmaCrawlNURL.STACK_TYPE_REMOTE)) /*local/remote stack*/
);
return null;
@ -870,13 +969,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (u == null) return plasmaURL.dummyHash; else return u.toString();
}
private void processCrawling(plasmaCrawlNURL.entry urlEntry, String initiator) {
private void processCrawlingX(plasmaCrawlNURL.entry urlEntry, String initiator) {
if (urlEntry.url() == null) return;
String profileHandle = urlEntry.profileHandle();
//System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
plasmaCrawlProfile.entry profile = profiles.getEntry(profileHandle);
if (profile == null) {
log.logError("CRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
log.logError("CRAWL[" + noticeURL.coreStackSize() + ", " + noticeURL.remoteStackSize() + "]: LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
return;
}
log.logDebug("plasmaSwitchboard.processCrawling: url=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() +
@ -891,39 +991,41 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
(yacyCore.seedDB.mySeed.isPrincipal())) /* qualified */;
if (tryRemote) {
boolean success = processGlobalCrawling(urlEntry);
if (!(success)) processLocalCrawling(urlEntry, profile, initiator);
boolean success = processRemoteCrawlTrigger(urlEntry);
if (!(success)) processLocalCrawling(urlEntry, profile);
} else {
processLocalCrawling(urlEntry, profile, initiator);
processLocalCrawling(urlEntry, profile);
}
}
private void processLocalCrawling(plasmaCrawlNURL.entry urlEntry, plasmaCrawlProfile.entry profile, String initiator) {
private boolean processLocalCrawling(plasmaCrawlNURL.entry urlEntry, plasmaCrawlProfile.entry profile) {
// work off one Crawl stack entry
if ((urlEntry == null) && (urlEntry.url() == null)) {
log.logInfo("LOCALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null");
return;
log.logInfo("LOCALCRAWL[" + noticeURL.coreStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null");
return false;
}
cacheLoader.loadParallel(urlEntry.url(), urlEntry.referrerHash(), initiator, urlEntry.depth(), profile);
log.logInfo("LOCALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: enqueued for load " + urlEntry.url());
cacheLoader.loadParallel(urlEntry.url(), urlEntry.referrerHash(), urlEntry.initiator(), urlEntry.depth(), profile);
log.logInfo("LOCALCRAWL[" + noticeURL.coreStackSize() + ", " + noticeURL.remoteStackSize() + "]: enqueued for load " + urlEntry.url());
return true;
}
private boolean processGlobalCrawling(plasmaCrawlNURL.entry urlEntry) {
private boolean processRemoteCrawlTrigger(plasmaCrawlNURL.entry urlEntry) {
if (urlEntry == null) {
log.logInfo("GLOBALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null");
log.logInfo("REMOTECRAWLTRIGGER[" + noticeURL.coreStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null");
return false;
}
// are we qualified?
if ((yacyCore.seedDB.mySeed == null) ||
(yacyCore.seedDB.mySeed.isJunior())) {
log.logDebug("plasmaSwitchboard.processGlobalCrawling: no permission");
log.logDebug("plasmaSwitchboard.processRemoteCrawlTrigger: no permission");
return false;
}
// check url
if (urlEntry.url() == null) {
log.logDebug("ERROR: plasmaSwitchboard.processGlobalCrawling - url is null. name=" + urlEntry.name());
log.logDebug("ERROR: plasmaSwitchboard.processRemoteCrawlTrigger - url is null. name=" + urlEntry.name());
return false;
}
String nexturlString = urlEntry.url().toString();
@ -932,7 +1034,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// check remote crawl
yacySeed remoteSeed = yacyCore.dhtAgent.getCrawlSeed(urlhash);
if (remoteSeed == null) {
log.logDebug("plasmaSwitchboard.processGlobalCrawling: no remote crawl seed available");
log.logDebug("plasmaSwitchboard.processRemoteCrawlTrigger: no remote crawl seed available");
return false;
}
@ -960,13 +1062,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
yacyCore.peerActions.peerDeparture(remoteSeed);
return false;
} else try {
log.logDebug("plasmaSwitchboard.processGlobalCrawling: remoteSeed=" + remoteSeed.getName() + ", url=" + nexturlString + ", response=" + page.toString()); // DEBUG
log.logDebug("plasmaSwitchboard.processRemoteCrawlTrigger: remoteSeed=" + remoteSeed.getName() + ", url=" + nexturlString + ", response=" + page.toString()); // DEBUG
int newdelay = Integer.parseInt((String) page.get("delay"));
yacyCore.dhtAgent.setCrawlDelay(remoteSeed.hash, newdelay);
String response = (String) page.get("response");
if (response.equals("stacked")) {
log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " PLACED URL=" + nexturlString + "; NEW DELAY=" + newdelay);
log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " PLACED URL=" + nexturlString + "; NEW DELAY=" + newdelay);
return true;
} else if (response.equals("double")) {
String lurl = (String) page.get("lurl");
@ -974,19 +1076,19 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String propStr = crypt.simpleDecode(lurl, (String) page.get("key"));
plasmaCrawlLURL.entry entry = loadedURL.newEntry(propStr, true, yacyCore.seedDB.mySeed.hash, remoteSeed.hash, 1);
noticeURL.remove(entry.hash());
log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " SUPERFLUOUS. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + "). URL IS CONSIDERED AS 'LOADED!'");
log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " SUPERFLUOUS. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + "). URL IS CONSIDERED AS 'LOADED!'");
return true;
} else {
log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " REJECTED. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + ")");
log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " REJECTED. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + ")");
return false;
}
} else {
log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " DENIED. RESPONSE=" + response + ", CAUSE=" + page.get("reason") + ", URL=" + nexturlString);
log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " DENIED. RESPONSE=" + response + ", CAUSE=" + page.get("reason") + ", URL=" + nexturlString);
return false;
}
} catch (Exception e) {
// wrong values
log.logError("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " FAILED. CLIENT RETURNED: " + page.toString());
log.logError("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " FAILED. CLIENT RETURNED: " + page.toString());
e.printStackTrace();
return false;
}
@ -1337,7 +1439,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
int transferred;
long starttime = System.currentTimeMillis();
try {
if ((totalSize() == 0) &&
if ((queueSize() == 0) &&
(getConfig("allowDistributeIndex", "false").equals("true")) &&
((transferred = performTransferIndex(indexCount, peerCount, true)) > 0)) {
indexCount = transferred;

@ -57,7 +57,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
private static final String newSingletonFileName = "indexAssortment001.db";
private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentLimit = 50;
private static final int ramcacheLimit = 70;
private static final int ramcacheLimit = 51;
// class variables

@ -412,10 +412,12 @@ xpstopw=true
30_peerping_busysleep=120000
40_peerseedcycle_idlesleep=1800000
40_peerseedcycle_busysleep=1200000
50_localcrawl_idlesleep=15000
50_localcrawl_idlesleep=10000
50_localcrawl_busysleep=0
60_globalcrawl_idlesleep=30000
60_globalcrawl_busysleep=3000
61_globalcrawltrigger_idlesleep=10000
61_globalcrawltrigger_busysleep=0
62_remotetriggeredcrawl_idlesleep=20000
62_remotetriggeredcrawl_busysleep=0
70_cachemanager_idlesleep=10000
70_cachemanager_busysleep=0
80_dequeue_idlesleep=10000

Loading…
Cancel
Save