From 89c0aa0e74b4ce4b17c96b37ad29d37e24cb3695 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 17 Jul 2013 15:20:56 +0200 Subject: [PATCH] added collection_sxt to error documents --- htroot/Crawler_p.java | 1 + htroot/yacy/crawlReceipt.java | 1 + htroot/yacy/urls.java | 1 + source/net/yacy/crawler/CrawlStacker.java | 3 +- source/net/yacy/crawler/data/CrawlQueues.java | 8 +++-- source/net/yacy/crawler/data/ZURL.java | 3 +- .../net/yacy/crawler/retrieval/FTPLoader.java | 4 +-- .../yacy/crawler/retrieval/HTTPLoader.java | 29 +++++++++---------- .../net/yacy/repository/LoaderDispatcher.java | 8 ++--- source/net/yacy/search/Switchboard.java | 11 ++++++- .../schema/CollectionConfiguration.java | 4 ++- .../net/yacy/search/snippet/MediaSnippet.java | 2 +- 12 files changed, 47 insertions(+), 28 deletions(-) diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 97db138a9..b0733b786 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -444,6 +444,7 @@ public class Crawler_p { 0, 0, 0), + null, sb.peers.mySeed().hash.getBytes(), new Date(), 1, diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index 2b1fcbf25..d83aa38d6 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -163,6 +163,7 @@ public final class crawlReceipt { sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case sb.crawlQueues.errorURL.push( entry.toBalancerEntry(iam), + null, youare.getBytes(), null, 0, diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java index 6a9c6667f..3fa8cdd00 100644 --- a/htroot/yacy/urls.java +++ b/htroot/yacy/urls.java @@ -82,6 +82,7 @@ public class urls { // place url to notice-url db sb.crawlQueues.delegatedURL.push( entry, + null, sb.peers.mySeed().hash.getBytes(), new Date(), 0, diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index f785524f6..cf61e928c 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -149,7 +149,8 @@ public final class CrawlStacker { // if the url was rejected we store it into the error URL db if (rejectReason != null) { - this.nextQueue.errorURL.push(entry, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1); + final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(entry.profileHandle())); + this.nextQueue.errorURL.push(entry, profile, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1); } } catch (final Exception e) { CrawlStacker.this.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e); diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 4ca3ed039..5c276a89a 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -612,6 +612,7 @@ public class CrawlQueues { private Request request; private final Integer code; private final long start; + private final CrawlProfile profile; private Loader(final Request entry) { this.start = System.currentTimeMillis(); @@ -619,6 +620,7 @@ public class CrawlQueues { this.request.setStatus("worker-initialized", WorkflowJob.STATUS_INITIATED); this.code = Integer.valueOf(entry.hashCode()); this.setPriority(Thread.MIN_PRIORITY); // http requests from the crawler should not cause that other functions work worse + this.profile = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle())); } private long age() { @@ -637,6 +639,7 @@ public class CrawlQueues { //if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt."); CrawlQueues.this.errorURL.push( this.request, + profile, ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash), new Date(), 1, @@ -652,8 +655,7 @@ public class CrawlQueues { // returns null if everything went fine, a fail reason string if a problem occurred try { this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING); - final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle())); - final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); + final Response response = CrawlQueues.this.sb.loader.load(this.request, profile == null ? CacheStrategy.IFEXIST : profile.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT); if (response == null) { this.request.setStatus("error", WorkflowJob.STATUS_FINISHED); if (CrawlQueues.this.log.isFine()) { @@ -677,6 +679,7 @@ public class CrawlQueues { if (result != null) { CrawlQueues.this.errorURL.push( this.request, + profile, ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash), new Date(), 1, @@ -690,6 +693,7 @@ public class CrawlQueues { } catch (final Exception e) { CrawlQueues.this.errorURL.push( this.request, + profile, ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash), new Date(), 1, diff --git a/source/net/yacy/crawler/data/ZURL.java b/source/net/yacy/crawler/data/ZURL.java index 9740c4525..8a182955e 100644 --- a/source/net/yacy/crawler/data/ZURL.java +++ b/source/net/yacy/crawler/data/ZURL.java @@ -172,6 +172,7 @@ public class ZURL implements Iterable { public void push( final Request bentry, + final CrawlProfile profile, final byte[] executor, final Date workdate, final int workcount, @@ -190,7 +191,7 @@ public class ZURL implements Iterable { if (this.fulltext.getDefaultConnector() != null && failCategory.store) { // send the error to solr try { - SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), failCategory.name() + " " + reason, failCategory.failType, httpcode); + SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), profile == null ? null : profile.collections(), failCategory.name() + " " + reason, failCategory.failType, httpcode); this.fulltext.getDefaultConnector().add(errorDoc); } catch (final IOException e) { ConcurrentLog.warn("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage()); diff --git a/source/net/yacy/crawler/retrieval/FTPLoader.java b/source/net/yacy/crawler/retrieval/FTPLoader.java index 8d2523b97..080117d01 100644 --- a/source/net/yacy/crawler/retrieval/FTPLoader.java +++ b/source/net/yacy/crawler/retrieval/FTPLoader.java @@ -101,6 +101,7 @@ public class FTPLoader { // create new ftp client final FTPClient ftpClient = new FTPClient(); + final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle())); // get a connection if (openConnection(ftpClient, entryUrl)) { // test if the specified file is a directory @@ -130,7 +131,6 @@ public class FTPLoader { final ResponseHeader responseHeader = new ResponseHeader(200); responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date())); responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html"); - final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle())); response = new Response( request, requestHeader, @@ -156,7 +156,7 @@ public class FTPLoader { if (berr.size() > 0 || response == null) { // some error logging final String detail = (berr.size() > 0) ? "Errorlog: " + berr.toString() : ""; - this.sb.crawlQueues.errorURL.push(request, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1); + this.sb.crawlQueues.errorURL.push(request, profile, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1); throw new IOException("FTPLoader: Unable to download URL '" + request.url().toString() + "': " + detail); } diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java index eed260701..0dbe94568 100644 --- a/source/net/yacy/crawler/retrieval/HTTPLoader.java +++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java @@ -70,20 +70,20 @@ public final class HTTPLoader { this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 30000); } - public Response load(final Request entry, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException { + public Response load(final Request entry, CrawlProfile profile, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException { Latency.updateBeforeLoad(entry.url()); final long start = System.currentTimeMillis(); - final Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout); + final Response doc = load(entry, profile, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout); Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start); return doc; } - private Response load(final Request request, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException { + private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException { byte[] myHash = ASCII.getBytes(this.sb.peers.mySeed().hash); if (retryCount < 0) { - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1); throw new IOException("retry counter exceeded for URL " + request.url().toString() + ". Processing aborted."); } @@ -99,7 +99,7 @@ public final class HTTPLoader { // check if url is in blacklist final String hostlow = host.toLowerCase(); if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) { - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } @@ -146,7 +146,7 @@ public final class HTTPLoader { redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim(); if (redirectionUrlString.isEmpty()) { - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode); throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString); } @@ -160,32 +160,32 @@ public final class HTTPLoader { this.sb.webStructure.generateCitationReference(url, redirectionUrl); if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) { - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode); } if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) { // if we are already doing a shutdown we don't need to retry crawling if (Thread.currentThread().isInterrupted()) { - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode); throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown."); } // check if the url was already loaded if (Cache.has(redirectionUrl.hash())) { // customer request - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode); throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache"); } // retry crawling with new url request.redirectURL(redirectionUrl); - return load(request, retryCount - 1, maxFileSize, blacklistType, timeout); + return load(request, profile, retryCount - 1, maxFileSize, blacklistType, timeout); } // we don't want to follow redirects - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode); throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString); } else if (responseBody == null) { // no response, reject file - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode); throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString); } else if (statusCode == 200 || statusCode == 203) { // the transfer is ok @@ -196,12 +196,11 @@ public final class HTTPLoader { // check length again in case it was not possible to get the length before loading if (maxFileSize >= 0 && contentLength > maxFileSize) { - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode); throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)"); } // create a new cache entry - final CrawlProfile profile = request.profileHandle() == null ? null : this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle())); response = new Response( request, requestHeader, @@ -214,7 +213,7 @@ public final class HTTPLoader { return response; } else { // if the response has not the right response type then reject file - this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode); + this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode); throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString); } } diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 603cff76c..c8c3d0037 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -186,15 +186,15 @@ public final class LoaderDispatcher { if (url.isFile() || url.isSMB()) cacheStrategy = CacheStrategy.NOCACHE; // load just from the file system final String protocol = url.getProtocol(); final String host = url.getHost(); - + final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle())); + // check if url is in blacklist if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) { - this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); + this.sb.crawlQueues.errorURL.push(request, crawlProfile, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } // check if we have the page in the cache - final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle())); if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) { // we have passed a first test if caching is allowed // now see if there is a cache entry @@ -280,7 +280,7 @@ public final class LoaderDispatcher { // load resource from the internet Response response = null; if (protocol.equals("http") || protocol.equals("https")) { - response = this.httpLoader.load(request, maxFileSize, blacklistType, timeout); + response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, timeout); } else if (protocol.equals("ftp")) { response = this.ftpLoader.load(request, true); } else if (protocol.equals("smb")) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 5900c4357..a88706604 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -1803,6 +1803,7 @@ public final class Switchboard extends serverSwitch { //if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason); addURLtoErrorDB( response.url(), + response.profile(), (referrerURL == null) ? null : referrerURL.hash(), response.initiator(), response.name(), @@ -2474,6 +2475,7 @@ public final class Switchboard extends serverSwitch { this.log.warn("the resource '" + response.url() + "' is missing in the cache."); addURLtoErrorDB( response.url(), + response.profile(), response.referrerHash(), response.initiator(), response.name(), @@ -2498,6 +2500,7 @@ public final class Switchboard extends serverSwitch { this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage()); addURLtoErrorDB( response.url(), + response.profile(), response.referrerHash(), response.initiator(), response.name(), @@ -2597,6 +2600,7 @@ public final class Switchboard extends serverSwitch { if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern()); addURLtoErrorDB( in.queueEntry.url(), + profile, in.queueEntry.referrerHash(), in.queueEntry.initiator(), in.queueEntry.name(), @@ -2612,6 +2616,7 @@ public final class Switchboard extends serverSwitch { if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': denied by document-attached noindexing rule"); addURLtoErrorDB( in.queueEntry.url(), + profile, in.queueEntry.referrerHash(), in.queueEntry.initiator(), in.queueEntry.name(), @@ -2624,6 +2629,7 @@ public final class Switchboard extends serverSwitch { if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern()); addURLtoErrorDB( in.queueEntry.url(), + profile, in.queueEntry.referrerHash(), in.queueEntry.initiator(), in.queueEntry.name(), @@ -2707,6 +2713,7 @@ public final class Switchboard extends serverSwitch { //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase); addURLtoErrorDB( url, + profile, (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, @@ -2719,6 +2726,7 @@ public final class Switchboard extends serverSwitch { //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name()); addURLtoErrorDB( url, + profile, (referrerURL == null) ? null : referrerURL.hash(), queueEntry.initiator(), dc_title, @@ -3361,6 +3369,7 @@ public final class Switchboard extends serverSwitch { private void addURLtoErrorDB( final DigestURI url, + final CrawlProfile profile, final byte[] referrerHash, final byte[] initiator, final String name, @@ -3380,7 +3389,7 @@ public final class Switchboard extends serverSwitch { 0, 0, 0); - this.crawlQueues.errorURL.push(bentry, initiator, new Date(), 0, failCategory, failreason, -1); + this.crawlQueues.errorURL.push(bentry, profile, initiator, new Date(), 0, failCategory, failreason, -1); } public final void heuristicSite(final SearchEvent searchEvent, final String host) { diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index c6dab8b62..afd57ac64 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -64,6 +64,7 @@ import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.SpaceExceededException; +import net.yacy.crawler.data.CrawlProfile; import net.yacy.crawler.retrieval.Response; import net.yacy.document.Condenser; import net.yacy.document.Document; @@ -1191,7 +1192,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri * @param httpstatus * @throws IOException */ - public SolrInputDocument err(final DigestURI digestURI, final String failReason, final FailType failType, final int httpstatus) throws IOException { + public SolrInputDocument err(final DigestURI digestURI, String[] collections, final String failReason, final FailType failType, final int httpstatus) throws IOException { final SolrInputDocument solrdoc = new SolrInputDocument(); add(solrdoc, CollectionSchema.id, ASCII.String(digestURI.hash())); add(solrdoc, CollectionSchema.sku, digestURI.toNormalform(true)); @@ -1212,6 +1213,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri if (contains(CollectionSchema.failreason_s)) add(solrdoc, CollectionSchema.failreason_s, failReason); if (contains(CollectionSchema.failtype_s)) add(solrdoc, CollectionSchema.failtype_s, failType.name()); if (contains(CollectionSchema.httpstatus_i)) add(solrdoc, CollectionSchema.httpstatus_i, httpstatus); + if (contains(CollectionSchema.collection_sxt)) add(solrdoc, CollectionSchema.collection_sxt, collections); return solrdoc; } diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java index cd6513630..1c2580198 100644 --- a/source/net/yacy/search/snippet/MediaSnippet.java +++ b/source/net/yacy/search/snippet/MediaSnippet.java @@ -259,7 +259,7 @@ public class MediaSnippet implements Comparable, Comparator