|
|
@ -997,32 +997,34 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// do a local crawl
|
|
|
|
// do a local crawl
|
|
|
|
String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
|
|
|
|
|
|
|
|
plasmaCrawlNURL.Entry urlEntry = null;
|
|
|
|
plasmaCrawlNURL.Entry urlEntry = null;
|
|
|
|
try {
|
|
|
|
while (urlEntry == null && urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) > 0) {
|
|
|
|
urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE);
|
|
|
|
String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
|
|
|
|
String profileHandle = urlEntry.profileHandle();
|
|
|
|
try {
|
|
|
|
// System.out.println("DEBUG plasmaSwitchboard.processCrawling:
|
|
|
|
urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE);
|
|
|
|
// profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
|
|
|
|
String profileHandle = urlEntry.profileHandle();
|
|
|
|
if (profileHandle == null) {
|
|
|
|
// System.out.println("DEBUG plasmaSwitchboard.processCrawling:
|
|
|
|
log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
|
|
|
|
// profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
|
|
|
|
return true;
|
|
|
|
if (profileHandle == null) {
|
|
|
|
}
|
|
|
|
log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
|
|
|
|
plasmaCrawlProfile.entry profile = profiles.getEntry(profileHandle);
|
|
|
|
return true;
|
|
|
|
if (profile == null) {
|
|
|
|
}
|
|
|
|
log.logSevere(stats + ": LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
|
|
|
|
plasmaCrawlProfile.entry profile = profiles.getEntry(profileHandle);
|
|
|
|
|
|
|
|
if (profile == null) {
|
|
|
|
|
|
|
|
log.logSevere(stats + ": LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url());
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
log.logFine("LOCALCRAWL: URL=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() + ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter()
|
|
|
|
|
|
|
|
+ ", permission=" + ((yacyCore.seedDB == null) ? "undefined" : (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
processLocalCrawling(urlEntry, profile, stats);
|
|
|
|
return true;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
|
|
|
log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
log.logFine("LOCALCRAWL: URL=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() + ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter()
|
|
|
|
|
|
|
|
+ ", permission=" + ((yacyCore.seedDB == null) ? "undefined" : (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
processLocalCrawling(urlEntry, profile, stats);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
|
|
|
log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage());
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public int limitCrawlTriggerJobSize() {
|
|
|
|
public int limitCrawlTriggerJobSize() {
|
|
|
|