skip loader wait cycle on concurrent access in nocache configuration.

In nocache config resource is loaded online, leaving no benefit to wait for a faster cache hit.
pull/1/head
reger 11 years ago
parent 851e96cf6e
commit 48aed15c48

@ -148,10 +148,22 @@ public final class LoaderDispatcher {
return load(request, cacheStrategy, protocolMaxFileSize(request.url()), blacklistType, agent);
}
/**
* loads a resource from cache or web/ftp/smb/file
* on concurrent execution waits max 5 sec for the prev. loader to fill the cache (except for CacheStrategy.NOCACHE)
*
* @param request the request essentials
* @param cacheStrategy strategy according to NOCACHE, IFFRESH, IFEXIST, CACHEONLY
* @param maxFileSize
* @param blacklistType
* @param agent
* @return the loaded entity in a Response object
* @throws IOException
*/
public Response load(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
Semaphore check = this.loaderSteering.get(request.url());
if (check != null) {
// a loading process may be going on for that url
Semaphore check = this.loaderSteering.get(request.url());
if (check != null && cacheStrategy != CacheStrategy.NOCACHE) {
// a loading process is going on for that url
//ConcurrentLog.info("LoaderDispatcher", "waiting for " + request.url().toNormalform(true));
long t = System.currentTimeMillis();
try { check.tryAcquire(5, TimeUnit.SECONDS);} catch (final InterruptedException e) {}
@ -163,15 +175,14 @@ public final class LoaderDispatcher {
this.loaderSteering.put(request.url(), new Semaphore(0));
try {
final Response response = loadInternal(request, cacheStrategy, maxFileSize, blacklistType, agent);
check = this.loaderSteering.remove(request.url());
if (check != null) check.release(1000);
// finally block cleans up loaderSteering and semaphore
return response;
} catch (final Throwable e) {
} catch (final Throwable e) {
throw new IOException(e);
} finally {
// release the semaphore anyway
check = this.loaderSteering.remove(request.url());
if (check != null) check.release(1000);
check = this.loaderSteering.remove(request.url()); // = next caller goes directly to loadInternal (is ok we just wanted to fill cash)
if (check != null) check.release(1000); // don't block any other
}
}
@ -190,8 +201,8 @@ public final class LoaderDispatcher {
final String host = url.getHost();
final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle()));
// check if url is in blacklist
if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
// check if url is in blacklist
if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$");
}

Loading…
Cancel
Save