added information about the reason of pausing of crawls

pull/1/head
Michael Peter Christen 13 years ago
parent 2371ef031c
commit 15d1460b40

@ -63,7 +63,7 @@ public class Crawler_p {
// this servlet does NOT create the Crawler servlet page content!
// this servlet starts a web crawl. The interface for entering the web crawl parameters is in IndexCreate_p.html
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
// inital values for AJAX Elements (without JavaScript)
@ -107,9 +107,9 @@ public class Crawler_p {
// pause queue
final String queue = post.get("pause", "");
if ("localcrawler".equals(queue)) {
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, "user request in Crawler_p from " + header.refererHost());
} else if ("remotecrawler".equals(queue)) {
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, "user request in Crawler_p from " + header.refererHost());
}
}
@ -392,7 +392,6 @@ public class Crawler_p {
// stack requests
sb.crawler.putActive(handle, profile);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
final Set<DigestURI> successurls = new HashSet<DigestURI>();
final Map<DigestURI,String> failurls = new HashMap<DigestURI, String>();
sb.stackURLs(rootURLs, profile, successurls, failurls);
@ -489,7 +488,6 @@ public class Crawler_p {
}
sb.crawler.putActive(handle, profile);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
sb.crawlStacker.enqueueEntriesAsynchronous(sb.peers.mySeed().hash.getBytes(), profile.handle(), hyperlinks);
} catch (final PatternSyntaxException e) {
prop.put("info", "4"); // crawlfilter does not match url

@ -84,9 +84,9 @@ public class Status
} else if ( post.containsKey("pauseCrawlJob") ) {
final String jobType = post.get("jobType");
if ( "localCrawl".equals(jobType) ) {
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, "user demand on Status.html");
} else if ( "remoteTriggeredCrawl".equals(jobType) ) {
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, "user demand on Status.html");
}
redirect = true;
} else if ( post.containsKey("continueCrawlJob") ) {

@ -66,14 +66,16 @@ public class ResourceObserver {
this.normalizedMemoryFree = getNormalizedMemoryFree();
if (this.normalizedDiskFree.compareTo(Space.HIGH) < 0 || this.normalizedMemoryFree.compareTo(Space.HIGH) < 0 ) {
String reason = "";
if (this.normalizedDiskFree.compareTo(Space.HIGH) < 0) reason += " not enough disk space, " + this.path.getUsableSpace();
if (this.normalizedMemoryFree.compareTo(Space.HIGH) < 0 ) reason += " not enough memory space";
if (!this.sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
log.logInfo("pausing local crawls");
this.sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
this.sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, "resource observer:" + reason);
}
if (!this.sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
log.logInfo("pausing remote triggered crawls");
this.sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
this.sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, "resource observer:" + reason);
}
if ((this.normalizedDiskFree == Space.LOW || this.normalizedMemoryFree.compareTo(Space.HIGH) < 0) && this.sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, false)) {

@ -1189,11 +1189,11 @@ public final class Switchboard extends serverSwitch {
// pause crawls
final boolean lcp = crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
if ( !lcp ) {
pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, "network switch to " + networkDefinition);
}
final boolean rcp = crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
if ( !rcp ) {
pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, "network switch to " + networkDefinition);
}
// trigger online caution
this.proxyLastAccess = System.currentTimeMillis() + 3000; // at least 3 seconds online caution to prevent unnecessary action on database meanwhile
@ -2269,12 +2269,14 @@ public final class Switchboard extends serverSwitch {
*
* @param jobType
*/
public void pauseCrawlJob(final String jobType) {
public void pauseCrawlJob(final String jobType, String cause) {
final Object[] status = this.crawlJobsStatus.get(jobType);
synchronized ( status[SwitchboardConstants.CRAWLJOB_SYNC] ) {
status[SwitchboardConstants.CRAWLJOB_STATUS] = Boolean.TRUE;
}
setConfig(jobType + "_isPaused", "true");
setConfig(jobType + "_isPaused_cause", "cause");
log.logWarning("Crawl job '" + jobType + "' is paused: " + cause);
}
/**
@ -2728,7 +2730,6 @@ public final class Switchboard extends serverSwitch {
if (url.isFTP()) {
try {
this.crawler.putActive(handle, profile);
this.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
this.crawlStacker.enqueueEntriesFTP(this.peers.mySeed().hash.getBytes(), profile.handle(), url.getHost(), url.getPort(), false);
return null;
} catch (final Exception e) {

@ -368,10 +368,11 @@ public class Segment {
try {
this.fulltext.putDocument(solrInputDoc);
} catch ( final IOException e ) {
Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr, pausing Crawler! - " + e.getMessage());
String error = "failed to send " + urlNormalform + " to solr";
Log.logWarning("SOLR", error + ", pausing Crawler! - " + e.getMessage());
// pause the crawler!!!
Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL, error);
Switchboard.getSwitchboard().pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, error);
}
final long storageEndTime = System.currentTimeMillis();

Loading…
Cancel
Save