prevent that crawl starts with very large url lists cause a time-out in

the user front-end
pull/1/head
Michael Peter Christen 12 years ago
parent 98a4a4aa97
commit 33bc255e85

@ -2778,6 +2778,7 @@ public final class Switchboard extends serverSwitch {
} }
public void stackURLs(Set<DigestURI> rootURLs, final CrawlProfile profile, final Set<DigestURI> successurls, final Map<DigestURI,String> failurls) { public void stackURLs(Set<DigestURI> rootURLs, final CrawlProfile profile, final Set<DigestURI> successurls, final Map<DigestURI,String> failurls) {
if (rootURLs == null || rootURLs.size() == 0) return;
List<Thread> stackthreads = new ArrayList<Thread>(); // do this concurrently List<Thread> stackthreads = new ArrayList<Thread>(); // do this concurrently
for (DigestURI url: rootURLs) { for (DigestURI url: rootURLs) {
final DigestURI turl = url; final DigestURI turl = url;
@ -2789,11 +2790,12 @@ public final class Switchboard extends serverSwitch {
}; };
t.start(); t.start();
stackthreads.add(t); stackthreads.add(t);
try {Thread.sleep(10);} catch (InterruptedException e) {} // to prevent that this fires more than 100 connections pre second!
} }
for (Thread t: stackthreads)try {t.join(5000);} catch (InterruptedException e) {} long waitingtime = 1 + (30000 / rootURLs.size()); // at most wait only halve an minute to prevent that the crawl start runs into a time-out
for (Thread t: stackthreads) try {t.join(waitingtime);} catch (InterruptedException e) {}
} }
/** /**
* stack the url to the crawler * stack the url to the crawler
* @param profile * @param profile

Loading…
Cancel
Save