do not push noindex errors into log if in intranet mode. noindex

attributes are attached to artificial constructed index.html files which
list directories. Such files are naturally rejected by the crawler and
should not appear in the error log because these files are part of the
construction of file crawlers and confuse users if they see them in the
error log.
pull/1/head
orbiter 10 years ago
parent 688c6d8954
commit a65df4ce7e

@ -2692,7 +2692,7 @@ public final class Switchboard extends serverSwitch {
// check which files may take part in the indexing process
final List<Document> doclist = new ArrayList<Document>();
docloop: for (final Document document : in.documents) {
if (document.indexingDenied() && profile.obeyHtmlRobotsNoindex()) {
if (document.indexingDenied() && profile.obeyHtmlRobotsNoindex() && !this.isIntranetMode()) {
if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': denied by document-attached noindexing rule");
// create a new errorURL DB entry
this.crawlQueues.errorURL.push(in.queueEntry.url(), in.queueEntry.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "denied by document-attached noindexing rule", -1);

Loading…
Cancel
Save