harmonize used lastmodified date for rwi and fulltext in storeDocument

pull/93/head
reger 8 years ago
parent da0f4ee599
commit 8fe28a83f2

@ -95,7 +95,7 @@ public class Document {
private final double lon, lat;
private final Parser parserObject; // the source object that was used to create the Document
private final Map<String, Set<String>> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document
private final Date lastModified;
private final Date lastModified; // creation or last modification date of the source document
private int crawldepth;
public Document(final DigestURL location, final String mimeType, final String charset,
@ -513,6 +513,9 @@ dc_rights
return this.emaillinks;
}
/**
* @return last modification date of the source document
*/
public Date getLastModified() {
return this.lastModified;
}

@ -582,12 +582,14 @@ public class Segment {
final String urlNormalform = url.toNormalform(true);
final String language = votedLanguage(url, urlNormalform, document, condenser); // identification of the language
// STORE URL TO LOADED-URL-DB
Date modDate = responseHeader == null ? new Date() : responseHeader.lastModified();
// get last modified date of the document to be used for the rwi index
// (the lastmodified document propery should be the same in rwi and fulltext (calculated in yacy2solr))
Date modDate = responseHeader == null ? document.getLastModified() : responseHeader.lastModified();
if (modDate == null) modDate = new Date();
if (document.getLastModified().before(modDate)) modDate = document.getLastModified();
if (modDate.getTime() > loadDate.getTime()) modDate = loadDate;
char docType = Response.docType(document.dc_format());
// CREATE SOLR DOCUMENT
final CollectionConfiguration collectionConfig = this.fulltext.getDefaultConfiguration();
final CollectionConfiguration.SolrVector vector = collectionConfig.yacy2solr(this, collections, responseHeader, document, condenser, referrerURL, language, crawlProfile.isPushCrawlProfile(), this.fulltext().useWebgraph() ? this.fulltext.getWebgraphConfiguration() : null, sourceName);

Loading…
Cancel
Save