Fixed removal of URLs from the delegatedURL remote crawl stack

URLs were removed from the stack using their hash as a bytes array,
whereas the hash is stored in the stack as String instance.
pull/186/head
luccioman 7 years ago
parent 2bdd71de60
commit c726154a59

@ -147,7 +147,7 @@ public final class crawlReceipt {
// put new entry into database
sb.index.fulltext().putMetadata(entry);
ResultURLs.stack(ASCII.String(entry.url().hash()), entry.url().getHost(), youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash())); // the delegated work has been done
if (log.isInfo()) log.info("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false));
// ready for more
@ -160,7 +160,7 @@ public final class crawlReceipt {
}
if (sb.crawlQueues.delegatedURL != null) { // the delegated work is transformed into an error case
sb.crawlQueues.delegatedURL.remove(entry.hash());
sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash()));
sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1);
}
//switchboard.noticeURL.remove(receivedUrlhash);

@ -82,6 +82,8 @@ public class CrawlQueues {
public NoticedURL noticeURL;
public ErrorCache errorURL;
/** URLs pulled by remote peers in order to crawl them for us */
public Map<String, DigestURL> delegatedURL;
public CrawlQueues(final Switchboard sb, final File queuePath) {
@ -107,7 +109,7 @@ public class CrawlQueues {
if (this.remoteCrawlProviderHashes == null) this.remoteCrawlProviderHashes = new ArrayList<String>();
if (this.delegatedURL == null) {
this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
log.config("Finishted Startup of Crawling Management");
log.config("Finished Startup of Crawling Management");
}
}
/**
@ -205,7 +207,9 @@ public class CrawlQueues {
public void removeURL(final byte[] hash) {
assert hash != null && hash.length == 12;
this.noticeURL.removeByURLHash(hash);
if (this.delegatedURL != null) this.delegatedURL.remove(hash);
if (this.delegatedURL != null) {
this.delegatedURL.remove(ASCII.String(hash));
}
}
public int removeHosts(final Set<String> hosthashes) {

Loading…
Cancel
Save