diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 118ae1edc..0f60e5a8d 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -94,7 +94,7 @@ public class Document { private final double lon, lat; private final Object parserObject; // the source object that was used to create the Document private final Map> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document - private final Date date; + private final Date lastModified; private int crawldepth; public Document(final DigestURL location, final String mimeType, final String charset, @@ -110,7 +110,7 @@ public class Document { final LinkedHashMap rss, final LinkedHashMap images, final boolean indexingDenied, - final Date date) { + final Date lastModified) { this.source = location; this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType; this.charset = charset; @@ -146,7 +146,7 @@ public class Document { this.indexingDenied = indexingDenied; this.text = text == null ? "" : text; this.generic_facets = new HashMap>(); - this.date = date == null ? new Date() : date; + this.lastModified = lastModified == null ? new Date() : lastModified; this.crawldepth = 999; // unknown yet } @@ -476,8 +476,8 @@ dc_rights return this.emaillinks; } - public Date getDate() { - return this.date; + public Date getLastModified() { + return this.lastModified; } public double lon() { @@ -868,7 +868,7 @@ dc_rights rss.putAll(doc.getRSS()); images.putAll(doc.getImages()); if (doc.lon() != 0.0 && doc.lat() != 0.0) { lon = doc.lon(); lat = doc.lat(); } - if (doc.date.before(date)) date = doc.date; + if (doc.lastModified.before(date)) date = doc.lastModified; if (doc.getDepth() < mindepth) mindepth = doc.getDepth(); if (doc.dc_language() != null) languages.add(doc.dc_language()); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 068b60e12..a5c8d59e5 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -547,7 +547,8 @@ public class Segment { final SearchEvent searchEvent, final String sourceName, // contains the crawl profile hash if this comes from a web crawl final boolean storeToRWI, - final String proxy + final String proxy, + final String acceptLanguage ) { final long startTime = System.currentTimeMillis(); @@ -579,7 +580,7 @@ public class Segment { String ext = MultiProtocolURL.getFileExtension(url.getFile()).toLowerCase(); if (ext.length() == 0 || url.getFile().length() <= 1 || htmlParser.htmlExtensionsSet.contains(ext)) { // STORE IMAGE AND METADATA - Transactions.store(vector, crawlProfile.snapshotLoadImage(), crawlProfile.snapshotReplaceold(), proxy, crawlProfile.getAgent()); + Transactions.store(vector, crawlProfile.snapshotLoadImage(), crawlProfile.snapshotReplaceold(), proxy, crawlProfile.getAgent(), acceptLanguage); } } @@ -609,7 +610,7 @@ public class Segment { } // REMEMBER FIRST SEEN - setFirstSeenTime(url.hash(), Math.min(document.getDate().getTime(), System.currentTimeMillis())); // should exist already in the index at this time, but just to make sure + setFirstSeenTime(url.hash(), Math.min(document.getLastModified().getTime(), System.currentTimeMillis())); // should exist already in the index at this time, but just to make sure // write the edges to the citation reference index if (this.connectedCitation()) try { diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 05a1b1730..ffc1ae77e 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -490,7 +490,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri if (allAttr || contains(CollectionSchema.last_modified)) { Date lastModified = responseHeader == null ? new Date() : responseHeader.lastModified(); if (lastModified == null) lastModified = new Date(); - if (document.getDate().before(lastModified)) lastModified = document.getDate(); + if (document.getLastModified().before(lastModified)) lastModified = document.getLastModified(); long firstSeen = segment.getFirstSeenTime(digestURL.hash()); if (firstSeen > 0 && firstSeen < lastModified.getTime()) lastModified = new Date(firstSeen); // patch the date if we have seen the document earlier add(doc, CollectionSchema.last_modified, lastModified);