From c726154a59994137053d3c557a73e18315dcbbfc Mon Sep 17 00:00:00 2001 From: luccioman Date: Thu, 5 Jul 2018 09:36:36 +0200 Subject: [PATCH] Fixed removal of URLs from the delegatedURL remote crawl stack URLs were removed from the stack using their hash as a bytes array, whereas the hash is stored in the stack as String instance. --- htroot/yacy/crawlReceipt.java | 4 ++-- source/net/yacy/crawler/data/CrawlQueues.java | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index 553b6bf6a..9bb321f96 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -147,7 +147,7 @@ public final class crawlReceipt { // put new entry into database sb.index.fulltext().putMetadata(entry); ResultURLs.stack(ASCII.String(entry.url().hash()), entry.url().getHost(), youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS); - sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done + sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash())); // the delegated work has been done if (log.isInfo()) log.info("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false)); // ready for more @@ -160,7 +160,7 @@ public final class crawlReceipt { } if (sb.crawlQueues.delegatedURL != null) { // the delegated work is transformed into an error case - sb.crawlQueues.delegatedURL.remove(entry.hash()); + sb.crawlQueues.delegatedURL.remove(ASCII.String(entry.hash())); sb.crawlQueues.errorURL.push(entry.url(), 997, null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1); } //switchboard.noticeURL.remove(receivedUrlhash); diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 0425ecc50..8c0fb2d20 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -82,6 +82,8 @@ public class CrawlQueues { public NoticedURL noticeURL; public ErrorCache errorURL; + + /** URLs pulled by remote peers in order to crawl them for us */ public Map delegatedURL; public CrawlQueues(final Switchboard sb, final File queuePath) { @@ -107,7 +109,7 @@ public class CrawlQueues { if (this.remoteCrawlProviderHashes == null) this.remoteCrawlProviderHashes = new ArrayList(); if (this.delegatedURL == null) { this.delegatedURL = new ConcurrentHashMap(); - log.config("Finishted Startup of Crawling Management"); + log.config("Finished Startup of Crawling Management"); } } /** @@ -205,7 +207,9 @@ public class CrawlQueues { public void removeURL(final byte[] hash) { assert hash != null && hash.length == 12; this.noticeURL.removeByURLHash(hash); - if (this.delegatedURL != null) this.delegatedURL.remove(hash); + if (this.delegatedURL != null) { + this.delegatedURL.remove(ASCII.String(hash)); + } } public int removeHosts(final Set hosthashes) {