diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index e2d72fe1b..f4458a560 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -24,7 +24,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.Writer; import java.net.MalformedURLException; -import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -56,7 +55,6 @@ import net.yacy.peers.NewsPool; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; -import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -387,16 +385,9 @@ public class Crawler_p { try {sb.crawlQueues.noticeURL.removeByProfileHandle(profile.handle(), 10000);} catch (final SpaceExceededException e1) {} // delete all error urls for that domain - List hosthashes = new ArrayList(); for (DigestURL u: rootURLs) { - hosthashes.add(ASCII.getBytes(u.hosthash())); - } - sb.crawlQueues.errorURL.removeHosts(hosthashes); - for (byte[] hosthash: hosthashes) { - try { - String deletequery = CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"; - sb.index.fulltext().getDefaultConnector().deleteByQuery(deletequery); - } catch (final IOException e) {ConcurrentLog.logException(e);} + sb.index.fulltext().remove(u.hash()); + sb.crawlQueues.errorURL.removeHost(ASCII.getBytes(u.hosthash())); } sb.index.fulltext().commit(true); diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java index 3684117b3..34f9759e4 100644 --- a/htroot/QuickCrawlLink_p.java +++ b/htroot/QuickCrawlLink_p.java @@ -32,7 +32,6 @@ import java.net.MalformedURLException; import java.util.Date; -import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.yacy.CacheStrategy; @@ -128,7 +127,6 @@ public class QuickCrawlLink_p { final byte[] urlhash = crawlingStartURL.hash(); indexSegment.fulltext().remove(urlhash); sb.crawlQueues.noticeURL.removeByURLHash(urlhash); - sb.crawlQueues.errorURL.remove(ASCII.String(urlhash)); // create crawling profile CrawlProfile pe = null; diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index c6c903f0c..5ff4cbe5e 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -27,7 +27,6 @@ package net.yacy.crawler; import java.io.IOException; import java.net.InetAddress; import java.net.MalformedURLException; -import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Locale; @@ -185,9 +184,7 @@ public final class CrawlStacker { if (replace) { this.indexSegment.fulltext().remove(urlhash); byte[] hosthash = new byte[6]; System.arraycopy(urlhash, 6, hosthash, 0, 6); - List hosthashes = new ArrayList(); hosthashes.add(hosthash); - this.nextQueue.errorURL.removeHosts(hosthashes); - this.nextQueue.removeURL(urlhash); + this.nextQueue.errorURL.removeHost(hosthash); String u = url.toNormalform(true); if (u.endsWith("/")) { u = u + "index.html"; @@ -198,7 +195,6 @@ public final class CrawlStacker { final byte[] uh = new DigestURL(u).hash(); this.indexSegment.fulltext().remove(uh); this.nextQueue.noticeURL.removeByURLHash(uh); - this.nextQueue.errorURL.remove(ASCII.String(uh)); } catch (final MalformedURLException e1) {} } @@ -246,7 +242,6 @@ public final class CrawlStacker { if (replace) { CrawlStacker.this.indexSegment.fulltext().remove(urlhash); cq.noticeURL.removeByURLHash(urlhash); - cq.errorURL.remove(ASCII.String(urlhash)); } // put entry on crawl stack diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index fc355d1ba..be18de809 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -119,11 +119,6 @@ public class CrawlQueues { this.workers.clear(); this.remoteCrawlProviderHashes.clear(); this.noticeURL.clear(); - try { - this.errorURL.clear(); - } catch (final IOException e) { - ConcurrentLog.logException(e); - } this.delegatedURL.clear(); } @@ -154,7 +149,6 @@ public class CrawlQueues { assert hash != null && hash.length == 12; this.noticeURL.removeByURLHash(hash); this.delegatedURL.remove(hash); - this.errorURL.remove(ASCII.String(hash)); } public DigestURL getURL(final byte[] urlhash) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 170d5ef30..70cb1a180 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2878,9 +2878,8 @@ public final class Switchboard extends serverSwitch { // remove the document from the error-db byte[] hosthash = new byte[6]; System.arraycopy(urlhash, 6, hosthash, 0, 6); - List hosthashes = new ArrayList(); hosthashes.add(hosthash); - this.crawlQueues.errorURL.removeHosts(hosthashes); - this.crawlQueues.removeURL(urlhash); + this.crawlQueues.errorURL.removeHost(hosthash); + this.index.fulltext().remove(urlhash); // get a scraper to get the title Document scraper; diff --git a/source/net/yacy/search/index/ErrorCache.java b/source/net/yacy/search/index/ErrorCache.java index e0ac6c42d..036386089 100644 --- a/source/net/yacy/search/index/ErrorCache.java +++ b/source/net/yacy/search/index/ErrorCache.java @@ -79,28 +79,14 @@ public class ErrorCache { this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); } - public void remove(final String hash) { - if (hash == null) return; - this.stack.remove(hash); + public void removeHost(final byte[] hosthash) { + if (hosthash == null) return; try { - this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + hash + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); - } catch (final IOException e) { - return; - } - } - - public void removeHosts(final Iterable hosthashes) { - if (hosthashes == null) return; - try { - for (byte[] hosthash : hosthashes) { - this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); - } + this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); Iterator i = ErrorCache.this.stack.keySet().iterator(); while (i.hasNext()) { String b = i.next(); - for (byte[] hosthash : hosthashes) { - if (NaturalOrder.naturalOrder.equal(hosthash, 0, ASCII.getBytes(b), 6, 6)) i.remove(); - } + if (NaturalOrder.naturalOrder.equal(hosthash, 0, ASCII.getBytes(b), 6, 6)) i.remove(); } } catch (final IOException e) { }