check error URL cache before adding errorDoc to index

- del obsolete related switchboardconstant
pull/38/head
reger 9 years ago
parent 9636a74633
commit a58d34a4e8

@ -189,11 +189,6 @@ public final class SwitchboardConstants {
* <p>Name of the setting how much memory in bytes should be assigned to the Noticed URLs DB for caching purposes</p> * <p>Name of the setting how much memory in bytes should be assigned to the Noticed URLs DB for caching purposes</p>
*/ */
public static final String RAM_CACHE_NURL_TIME = "ramCacheNURL_time"; public static final String RAM_CACHE_NURL_TIME = "ramCacheNURL_time";
/**
* <p><code>public static final String <strong>RAM_CACHE_EURL</strong> = "ramCacheEURL"</code></p>
* <p>Name of the setting how much memory in bytes should be assigned to the Erroneous URLs DB for caching purposes</p>
*/
public static final String RAM_CACHE_EURL_TIME = "ramCacheEURL_time";
/** /**
* <p><code>public static final String <strong>RAM_CACHE_RWI</strong> = "ramCacheRWI"</code></p> * <p><code>public static final String <strong>RAM_CACHE_RWI</strong> = "ramCacheRWI"</code></p>
* <p>Name of the setting how much memory in bytes should be assigned to the RWIs DB for caching purposes</p> * <p>Name of the setting how much memory in bytes should be assigned to the RWIs DB for caching purposes</p>

@ -106,12 +106,25 @@ public class ErrorCache {
} }
} }
/**
* Adds a error document to the Solr index (marked as failed by httpstatus_i <> 200)
* and caches recently added failed docs (up to maxStackSize = 1000)
*
* @param url failed url
* @param crawldepth info crawldepth
* @param profile info of collection
* @param failCategory .store to index otherwise cache only
* @param anycause info cause-string
* @param httpcode http response code
*/
public void push(final DigestURL url, final int crawldepth, final CrawlProfile profile, final FailCategory failCategory, String anycause, final int httpcode) { public void push(final DigestURL url, final int crawldepth, final CrawlProfile profile, final FailCategory failCategory, String anycause, final int httpcode) {
// assert executor != null; // null == proxy ! // assert executor != null; // null == proxy !
assert failCategory.store || httpcode == -1 : "failCategory=" + failCategory.name(); assert failCategory.store || httpcode == -1 : "failCategory=" + failCategory.name();
if (anycause == null) anycause = "unknown"; if (anycause == null) anycause = "unknown";
final String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : ""); final String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : "");
if (!reason.startsWith("double")) log.info(url.toNormalform(true) + " - " + reason); if (!reason.startsWith("double")) log.info(url.toNormalform(true) + " - " + reason);
if (!this.cache.containsKey(ASCII.String(url.hash()))) { // no further action if in error-cache
CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc( CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc(
url, profile == null ? null : profile.collections(), url, profile == null ? null : profile.collections(),
failCategory.name() + " " + reason, failCategory.failType, failCategory.name() + " " + reason, failCategory.failType,
@ -136,6 +149,7 @@ public class ErrorCache {
} }
checkStackSize(); checkStackSize();
} }
}
private void checkStackSize() { private void checkStackSize() {
synchronized (this.cache) { synchronized (this.cache) {

Loading…
Cancel
Save