Fixed removal of URLs from the delegatedURL remote crawl stack

URLs were removed from the stack using their hash as a bytes array,
whereas the hash is stored in the stack as String instance.
pull/186/head
luccioman 7 years ago
parent 2bdd71de60
commit c726154a59

@ -147,7 +147,7 @@ public final class crawlReceipt {
// put new entry into database // put new entry into database
sb.index.fulltext().putMetadata(entry); sb.index.fulltext().putMetadata(entry);
ResultURLs.stack(ASCII.String(entry.url().hash()), entry.url().getHost(), youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); ResultURLs.stack(ASCII.String(entry.url().hash()), entry.url().getHost(), youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash())); // the delegated work has been done
if (log.isInfo()) log.info("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false)); if (log.isInfo()) log.info("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false));
// ready for more // ready for more
@ -160,7 +160,7 @@ public final class crawlReceipt {
} }
if (sb.crawlQueues.delegatedURL != null) { // the delegated work is transformed into an error case if (sb.crawlQueues.delegatedURL != null) { // the delegated work is transformed into an error case
sb.crawlQueues.delegatedURL.remove(entry.hash()); sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash()));
sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1); sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1);
} }
//switchboard.noticeURL.remove(receivedUrlhash); //switchboard.noticeURL.remove(receivedUrlhash);

@ -82,6 +82,8 @@ public class CrawlQueues {
public NoticedURL noticeURL; public NoticedURL noticeURL;
public ErrorCache errorURL; public ErrorCache errorURL;
/** URLs pulled by remote peers in order to crawl them for us */
public Map<String, DigestURL> delegatedURL; public Map<String, DigestURL> delegatedURL;
public CrawlQueues(final Switchboard sb, final File queuePath) { public CrawlQueues(final Switchboard sb, final File queuePath) {
@ -107,7 +109,7 @@ public class CrawlQueues {
if (this.remoteCrawlProviderHashes == null) this.remoteCrawlProviderHashes = new ArrayList<String>(); if (this.remoteCrawlProviderHashes == null) this.remoteCrawlProviderHashes = new ArrayList<String>();
if (this.delegatedURL == null) { if (this.delegatedURL == null) {
this.delegatedURL = new ConcurrentHashMap<String, DigestURL>(); this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
log.config("Finishted Startup of Crawling Management"); log.config("Finished Startup of Crawling Management");
} }
} }
/** /**
@ -205,7 +207,9 @@ public class CrawlQueues {
public void removeURL(final byte[] hash) { public void removeURL(final byte[] hash) {
assert hash != null && hash.length == 12; assert hash != null && hash.length == 12;
this.noticeURL.removeByURLHash(hash); this.noticeURL.removeByURLHash(hash);
if (this.delegatedURL != null) this.delegatedURL.remove(hash); if (this.delegatedURL != null) {
this.delegatedURL.remove(ASCII.String(hash));
}
} }
public int removeHosts(final Set<String> hosthashes) { public int removeHosts(final Set<String> hosthashes) {

Loading…
Cancel
Save