fix for bad ErrorCache.exists test (bug from latest commit)

pull/1/head
Michael Peter Christen 11 years ago
parent 09412ea3a4
commit 552ef9f18e

@ -392,13 +392,14 @@ public final class CrawlStacker {
final String urlstring = url.toString();
// check if the url is double registered
String urlhash = ASCII.String(url.hash());
final HarvestProcess dbocc = this.nextQueue.exists(url.hash()); // returns the name of the queue if entry exists
final Date oldDate = this.indexSegment.fulltext().getLoadDate(ASCII.String(url.hash())); // TODO: combine the exists-query with this one
final Date oldDate = this.indexSegment.fulltext().getLoadDate(urlhash); // TODO: combine the exists-query with this one
if (oldDate == null) {
if (dbocc != null) {
// do double-check
if (dbocc == HarvestProcess.ERRORS) {
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(urlhash);
return "double in: errors (" + errorEntry.getFailReason() + ")";
}
return "double in: " + dbocc.toString();
@ -414,7 +415,7 @@ public final class CrawlStacker {
return "double in: LURL-DB, oldDate = " + oldDate.toString();
}
if (dbocc == HarvestProcess.ERRORS) {
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(urlhash);
if (CrawlStacker.log.isInfo()) CrawlStacker.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.getFailReason());
return "double in: errors (" + errorEntry.getFailReason() + "), oldDate = " + oldDate.toString();
}

@ -164,7 +164,7 @@ public class ErrorCache {
if (doc == null) return false;
// check if the document contains a value in the field CollectionSchema.failreason_s
Object failreason = doc.getFieldValue(CollectionSchema.failreason_s.getSolrFieldName());
return failreason == null || failreason.toString().length() == 0;
return failreason != null && failreason.toString().length() > 0;
} catch (IOException e) {
return false;
}

Loading…
Cancel
Save