From 89c0aa0e74b4ce4b17c96b37ad29d37e24cb3695 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Wed, 17 Jul 2013 15:20:56 +0200
Subject: [PATCH] added collection_sxt to error documents

---
 htroot/Crawler_p.java                         |  1 +
 htroot/yacy/crawlReceipt.java                 |  1 +
 htroot/yacy/urls.java                         |  1 +
 source/net/yacy/crawler/CrawlStacker.java     |  3 +-
 source/net/yacy/crawler/data/CrawlQueues.java |  8 +++--
 source/net/yacy/crawler/data/ZURL.java        |  3 +-
 .../net/yacy/crawler/retrieval/FTPLoader.java |  4 +--
 .../yacy/crawler/retrieval/HTTPLoader.java    | 29 +++++++++----------
 .../net/yacy/repository/LoaderDispatcher.java |  8 ++---
 source/net/yacy/search/Switchboard.java       | 11 ++++++-
 .../schema/CollectionConfiguration.java       |  4 ++-
 .../net/yacy/search/snippet/MediaSnippet.java |  2 +-
 12 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index 97db138a9..b0733b786 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -444,6 +444,7 @@ public class Crawler_p {
                                             0,
                                             0,
                                             0),
+                                    null,
                                     sb.peers.mySeed().hash.getBytes(),
                                     new Date(),
                                     1,
diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java
index 2b1fcbf25..d83aa38d6 100644
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@@ -163,6 +163,7 @@ public final class crawlReceipt {
         sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case
         sb.crawlQueues.errorURL.push(
                 entry.toBalancerEntry(iam),
+                null,
                 youare.getBytes(),
                 null,
                 0,
diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java
index 6a9c6667f..3fa8cdd00 100644
--- a/htroot/yacy/urls.java
+++ b/htroot/yacy/urls.java
@@ -82,6 +82,7 @@ public class urls {
                 // place url to notice-url db
                 sb.crawlQueues.delegatedURL.push(
                                 entry,
+                                null,
                                 sb.peers.mySeed().hash.getBytes(),
                                 new Date(),
                                 0,
diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java
index f785524f6..cf61e928c 100644
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@@ -149,7 +149,8 @@ public final class CrawlStacker {
 
             // if the url was rejected we store it into the error URL db
             if (rejectReason != null) {
-                this.nextQueue.errorURL.push(entry, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
+                final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(entry.profileHandle()));
+                this.nextQueue.errorURL.push(entry, profile, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
             }
         } catch (final Exception e) {
             CrawlStacker.this.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java
index 4ca3ed039..5c276a89a 100644
--- a/source/net/yacy/crawler/data/CrawlQueues.java
+++ b/source/net/yacy/crawler/data/CrawlQueues.java
@@ -612,6 +612,7 @@ public class CrawlQueues {
         private Request request;
         private final Integer code;
         private final long start;
+        private final CrawlProfile profile;
 
         private Loader(final Request entry) {
             this.start = System.currentTimeMillis();
@@ -619,6 +620,7 @@ public class CrawlQueues {
             this.request.setStatus("worker-initialized", WorkflowJob.STATUS_INITIATED);
             this.code = Integer.valueOf(entry.hashCode());
             this.setPriority(Thread.MIN_PRIORITY); // http requests from the crawler should not cause that other functions work worse
+            this.profile = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
         }
 
         private long age() {
@@ -637,6 +639,7 @@ public class CrawlQueues {
                     //if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt.");
                     CrawlQueues.this.errorURL.push(
                             this.request,
+                            profile,
                             ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                             new Date(),
                             1,
@@ -652,8 +655,7 @@ public class CrawlQueues {
                     // returns null if everything went fine, a fail reason string if a problem occurred
                     try {
                         this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
-                        final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
-                        final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
+                        final Response response = CrawlQueues.this.sb.loader.load(this.request, profile == null ? CacheStrategy.IFEXIST : profile.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                         if (response == null) {
                             this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
                             if (CrawlQueues.this.log.isFine()) {
@@ -677,6 +679,7 @@ public class CrawlQueues {
                     if (result != null) {
                         CrawlQueues.this.errorURL.push(
                                 this.request,
+                                profile,
                                 ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                                 new Date(),
                                 1,
@@ -690,6 +693,7 @@ public class CrawlQueues {
             } catch (final Exception e) {
                 CrawlQueues.this.errorURL.push(
                         this.request,
+                        profile,
                         ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                         new Date(),
                         1,
diff --git a/source/net/yacy/crawler/data/ZURL.java b/source/net/yacy/crawler/data/ZURL.java
index 9740c4525..8a182955e 100644
--- a/source/net/yacy/crawler/data/ZURL.java
+++ b/source/net/yacy/crawler/data/ZURL.java
@@ -172,6 +172,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
 
     public void push(
             final Request bentry,
+            final CrawlProfile profile,
             final byte[] executor,
             final Date workdate,
             final int workcount,
@@ -190,7 +191,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
         if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
             // send the error to solr
             try {
-                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
+                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), profile == null ? null : profile.collections(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
                 this.fulltext.getDefaultConnector().add(errorDoc);
             } catch (final IOException e) {
                 ConcurrentLog.warn("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage());
diff --git a/source/net/yacy/crawler/retrieval/FTPLoader.java b/source/net/yacy/crawler/retrieval/FTPLoader.java
index 8d2523b97..080117d01 100644
--- a/source/net/yacy/crawler/retrieval/FTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/FTPLoader.java
@@ -101,6 +101,7 @@ public class FTPLoader {
         // create new ftp client
         final FTPClient ftpClient = new FTPClient();
 
+        final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
         // get a connection
         if (openConnection(ftpClient, entryUrl)) {
             // test if the specified file is a directory
@@ -130,7 +131,6 @@ public class FTPLoader {
                     final ResponseHeader responseHeader = new ResponseHeader(200);
                     responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date()));
                     responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
-                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
                     response = new Response(
                             request,
                             requestHeader,
@@ -156,7 +156,7 @@ public class FTPLoader {
         if (berr.size() > 0 || response == null) {
             // some error logging
             final String detail = (berr.size() > 0) ? "Errorlog: " + berr.toString() : "";
-            this.sb.crawlQueues.errorURL.push(request, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
             throw new IOException("FTPLoader: Unable to download URL '" + request.url().toString() + "': " + detail);
         }
 
diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java
index eed260701..0dbe94568 100644
--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@@ -70,20 +70,20 @@ public final class HTTPLoader {
         this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 30000);
     }
 
-    public Response load(final Request entry, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
+    public Response load(final Request entry, CrawlProfile profile, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
         Latency.updateBeforeLoad(entry.url());
         final long start = System.currentTimeMillis();
-        final Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout);
+        final Response doc = load(entry, profile, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout);
         Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
         return doc;
     }
 
-    private Response load(final Request request, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
+    private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
 
         byte[] myHash = ASCII.getBytes(this.sb.peers.mySeed().hash);
 
         if (retryCount < 0) {
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
             throw new IOException("retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
         }
 
@@ -99,7 +99,7 @@ public final class HTTPLoader {
         // check if url is in blacklist
         final String hostlow = host.toLowerCase();
         if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) {
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
             throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
         }
 
@@ -146,7 +146,7 @@ public final class HTTPLoader {
             redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim();
 
             if (redirectionUrlString.isEmpty()) {
-                this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
+                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
                 throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
             }
 
@@ -160,32 +160,32 @@ public final class HTTPLoader {
             this.sb.webStructure.generateCitationReference(url, redirectionUrl);
             
             if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
-                this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
+                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
             }
 
     	    if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
                 // if we are already doing a shutdown we don't need to retry crawling
                 if (Thread.currentThread().isInterrupted()) {
-                    this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
                     throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
                 }
 
                 // check if the url was already loaded
                 if (Cache.has(redirectionUrl.hash())) { // customer request
-                    this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
                     throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
                 }
 
                 // retry crawling with new url
                 request.redirectURL(redirectionUrl);
-                return load(request, retryCount - 1, maxFileSize, blacklistType, timeout);
+                return load(request, profile, retryCount - 1, maxFileSize, blacklistType, timeout);
     	    }
             // we don't want to follow redirects
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
             throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
         } else if (responseBody == null) {
     	    // no response, reject file
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
             throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
     	} else if (statusCode == 200 || statusCode == 203) {
             // the transfer is ok
@@ -196,12 +196,11 @@ public final class HTTPLoader {
 
             // check length again in case it was not possible to get the length before loading
             if (maxFileSize >= 0 && contentLength > maxFileSize) {
-            	this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
+            	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
             	throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
             }
 
             // create a new cache entry
-            final CrawlProfile profile = request.profileHandle() == null ? null : this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
             response = new Response(
                     request,
                     requestHeader,
@@ -214,7 +213,7 @@ public final class HTTPLoader {
             return response;
     	} else {
             // if the response has not the right response type then reject file
-        	this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
+        	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
             throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
         }
     }
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 603cff76c..c8c3d0037 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -186,15 +186,15 @@ public final class LoaderDispatcher {
         if (url.isFile() || url.isSMB()) cacheStrategy = CacheStrategy.NOCACHE; // load just from the file system
         final String protocol = url.getProtocol();
         final String host = url.getHost();
-
+        final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
+        
         // check if url is in blacklist
         if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
-            this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request, crawlProfile, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
             throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
         }
 
         // check if we have the page in the cache
-        final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
         if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) {
             // we have passed a first test if caching is allowed
             // now see if there is a cache entry
@@ -280,7 +280,7 @@ public final class LoaderDispatcher {
         // load resource from the internet
         Response response = null;
         if (protocol.equals("http") || protocol.equals("https")) {
-            response = this.httpLoader.load(request, maxFileSize, blacklistType, timeout);
+            response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, timeout);
         } else if (protocol.equals("ftp")) {
             response = this.ftpLoader.load(request, true);
         } else if (protocol.equals("smb")) {
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 5900c4357..a88706604 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -1803,6 +1803,7 @@ public final class Switchboard extends serverSwitch {
             //if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
             addURLtoErrorDB(
                 response.url(),
+                response.profile(),
                 (referrerURL == null) ? null : referrerURL.hash(),
                 response.initiator(),
                 response.name(),
@@ -2474,6 +2475,7 @@ public final class Switchboard extends serverSwitch {
                 this.log.warn("the resource '" + response.url() + "' is missing in the cache.");
                 addURLtoErrorDB(
                     response.url(),
+                    response.profile(),
                     response.referrerHash(),
                     response.initiator(),
                     response.name(),
@@ -2498,6 +2500,7 @@ public final class Switchboard extends serverSwitch {
             this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage());
             addURLtoErrorDB(
                 response.url(),
+                response.profile(),
                 response.referrerHash(),
                 response.initiator(),
                 response.name(),
@@ -2597,6 +2600,7 @@ public final class Switchboard extends serverSwitch {
             if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern());
             addURLtoErrorDB(
                     in.queueEntry.url(),
+                    profile,
                     in.queueEntry.referrerHash(),
                     in.queueEntry.initiator(),
                     in.queueEntry.name(),
@@ -2612,6 +2616,7 @@ public final class Switchboard extends serverSwitch {
                 if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': denied by document-attached noindexing rule");
                 addURLtoErrorDB(
                     in.queueEntry.url(),
+                    profile,
                     in.queueEntry.referrerHash(),
                     in.queueEntry.initiator(),
                     in.queueEntry.name(),
@@ -2624,6 +2629,7 @@ public final class Switchboard extends serverSwitch {
                 if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern());
                 addURLtoErrorDB(
                     in.queueEntry.url(),
+                    profile,
                     in.queueEntry.referrerHash(),
                     in.queueEntry.initiator(),
                     in.queueEntry.name(),
@@ -2707,6 +2713,7 @@ public final class Switchboard extends serverSwitch {
             //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase);
             addURLtoErrorDB(
                 url,
+                profile,
                 (referrerURL == null) ? null : referrerURL.hash(),
                 queueEntry.initiator(),
                 dc_title,
@@ -2719,6 +2726,7 @@ public final class Switchboard extends serverSwitch {
             //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
             addURLtoErrorDB(
                 url,
+                profile,
                 (referrerURL == null) ? null : referrerURL.hash(),
                 queueEntry.initiator(),
                 dc_title,
@@ -3361,6 +3369,7 @@ public final class Switchboard extends serverSwitch {
 
     private void addURLtoErrorDB(
         final DigestURI url,
+        final CrawlProfile profile,
         final byte[] referrerHash,
         final byte[] initiator,
         final String name,
@@ -3380,7 +3389,7 @@ public final class Switchboard extends serverSwitch {
                 0,
                 0,
                 0);
-        this.crawlQueues.errorURL.push(bentry, initiator, new Date(), 0, failCategory, failreason, -1);
+        this.crawlQueues.errorURL.push(bentry, profile, initiator, new Date(), 0, failCategory, failreason, -1);
     }
 
     public final void heuristicSite(final SearchEvent searchEvent, final String host) {
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index c6dab8b62..afd57ac64 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -64,6 +64,7 @@ import net.yacy.cora.storage.HandleSet;
 import net.yacy.cora.util.CommonPattern;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.SpaceExceededException;
+import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@@ -1191,7 +1192,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
      * @param httpstatus
      * @throws IOException
      */
-    public SolrInputDocument err(final DigestURI digestURI, final String failReason, final FailType failType, final int httpstatus) throws IOException {
+    public SolrInputDocument err(final DigestURI digestURI, String[] collections, final String failReason, final FailType failType, final int httpstatus) throws IOException {
         final SolrInputDocument solrdoc = new SolrInputDocument();
         add(solrdoc, CollectionSchema.id, ASCII.String(digestURI.hash()));
         add(solrdoc, CollectionSchema.sku, digestURI.toNormalform(true));
@@ -1212,6 +1213,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
         if (contains(CollectionSchema.failreason_s)) add(solrdoc, CollectionSchema.failreason_s, failReason);
         if (contains(CollectionSchema.failtype_s)) add(solrdoc, CollectionSchema.failtype_s, failType.name());
         if (contains(CollectionSchema.httpstatus_i)) add(solrdoc, CollectionSchema.httpstatus_i, httpstatus);
+        if (contains(CollectionSchema.collection_sxt)) add(solrdoc, CollectionSchema.collection_sxt, collections);
         return solrdoc;
     }
 
diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java
index cd6513630..1c2580198 100644
--- a/source/net/yacy/search/snippet/MediaSnippet.java
+++ b/source/net/yacy/search/snippet/MediaSnippet.java
@@ -259,7 +259,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
 
         // check if url is in blacklist
         if (Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile())) {
-            Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), ASCII.getBytes(Switchboard.getSwitchboard().peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), null, ASCII.getBytes(Switchboard.getSwitchboard().peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
             ConcurrentLog.fine("snippet fetch", "MEDIA-SNIPPET Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
             isBlacklisted = true;
         }