- let crawl loader jobs die after 10 seconds without new jobs

- corrected shutdown order t prevent a deadlock during shutdown
pull/1/head
Michael Peter Christen 11 years ago
parent 0792a4369c
commit e485fbd0ce

@ -758,6 +758,7 @@ public class Domains {
//if (!matchesList(host, nameCacheNoCachingPatterns)) System.out.println("DNSLOOKUP " + host);
try {
//final long t = System.currentTimeMillis();
String oldName = Thread.currentThread().getName();
Thread.currentThread().setName("Domains: DNS resolve of '" + host + "'"); // thread dump show which host is resolved
if (InetAddresses.isInetAddress(host)) {
try {
@ -767,6 +768,7 @@ public class Domains {
ip = null;
}
}
Thread.currentThread().setName(oldName);
if (ip == null) try {
ip = timeLimiter.callWithTimeout(new Callable<InetAddress>() {
@Override

@ -36,6 +36,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@ -102,6 +103,8 @@ public class CrawlQueues {
}
public synchronized void close() {
// removed pending requests
this.workerQueue.clear();
// wait for all workers to finish
for (int i = 0; i < this.worker.length; i++) {
try {this.workerQueue.put(POISON_REQUEST);} catch (InterruptedException e) {}
@ -124,7 +127,6 @@ public class CrawlQueues {
// wait for all workers to finish
this.workerQueue.clear();
for (final Loader w: this.worker) w.interrupt();
for (final Loader w: this.worker) try {w.join(10);} catch (final InterruptedException e1) {}
this.remoteCrawlProviderHashes.clear();
this.noticeURL.clear();
this.delegatedURL.clear();
@ -615,7 +617,8 @@ public class CrawlQueues {
public void run() {
this.setPriority(Thread.MIN_PRIORITY); // http requests from the crawler should not cause that other functions work worse
try {
while ((request = CrawlQueues.this.workerQueue.take()) != POISON_REQUEST) {
while ((request = CrawlQueues.this.workerQueue.poll(10, TimeUnit.SECONDS)) != POISON_REQUEST) {
if (request == null) break; // we run this only for a specific time and then let the process die to clear up resources
request.setStatus("worker-initialized", WorkflowJob.STATUS_INITIATED);
this.setName("CrawlQueues.Loader(" + request.url() + ")");
CrawlProfile profile = CrawlQueues.this.sb.crawler.get(UTF8.getBytes(request.profileHandle()));
@ -672,7 +675,9 @@ public class CrawlQueues {
request.setStatus("worker-exception", WorkflowJob.STATUS_FINISHED);
} finally {
request = null;
this.setName("CrawlQueues.Loader(WAITING)");
}
profile = null;
}
} catch (InterruptedException e2) {
ConcurrentLog.logException(e2);

@ -1719,6 +1719,7 @@ public final class Switchboard extends serverSwitch {
// closing all still running db importer jobs
this.crawlStacker.announceClose();
this.crawlStacker.close();
this.crawlQueues.close();
this.indexingDocumentProcessor.shutdown();
this.indexingCondensementProcessor.shutdown();
this.indexingAnalysisProcessor.shutdown();
@ -1736,7 +1737,6 @@ public final class Switchboard extends serverSwitch {
}
this.messageDB.close();
this.webStructure.close();
this.crawlQueues.close();
this.crawler.close();
this.log.config("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager (stand by...)");
this.index.close();

Loading…
Cancel
Save