harmonize used lastmodified date for rwi and fulltext in storeDocument

pull/93/head
reger 8 years ago
parent da0f4ee599
commit 8fe28a83f2

@ -95,7 +95,7 @@ public class Document {
private final double lon, lat; private final double lon, lat;
private final Parser parserObject; // the source object that was used to create the Document private final Parser parserObject; // the source object that was used to create the Document
private final Map<String, Set<String>> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document private final Map<String, Set<String>> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document
private final Date lastModified; private final Date lastModified; // creation or last modification date of the source document
private int crawldepth; private int crawldepth;
public Document(final DigestURL location, final String mimeType, final String charset, public Document(final DigestURL location, final String mimeType, final String charset,
@ -513,6 +513,9 @@ dc_rights
return this.emaillinks; return this.emaillinks;
} }
/**
* @return last modification date of the source document
*/
public Date getLastModified() { public Date getLastModified() {
return this.lastModified; return this.lastModified;
} }

@ -582,12 +582,14 @@ public class Segment {
final String urlNormalform = url.toNormalform(true); final String urlNormalform = url.toNormalform(true);
final String language = votedLanguage(url, urlNormalform, document, condenser); // identification of the language final String language = votedLanguage(url, urlNormalform, document, condenser); // identification of the language
// STORE URL TO LOADED-URL-DB // get last modified date of the document to be used for the rwi index
Date modDate = responseHeader == null ? new Date() : responseHeader.lastModified(); // (the lastmodified document propery should be the same in rwi and fulltext (calculated in yacy2solr))
Date modDate = responseHeader == null ? document.getLastModified() : responseHeader.lastModified();
if (modDate == null) modDate = new Date(); if (modDate == null) modDate = new Date();
if (document.getLastModified().before(modDate)) modDate = document.getLastModified();
if (modDate.getTime() > loadDate.getTime()) modDate = loadDate; if (modDate.getTime() > loadDate.getTime()) modDate = loadDate;
char docType = Response.docType(document.dc_format()); char docType = Response.docType(document.dc_format());
// CREATE SOLR DOCUMENT // CREATE SOLR DOCUMENT
final CollectionConfiguration collectionConfig = this.fulltext.getDefaultConfiguration(); final CollectionConfiguration collectionConfig = this.fulltext.getDefaultConfiguration();
final CollectionConfiguration.SolrVector vector = collectionConfig.yacy2solr(this, collections, responseHeader, document, condenser, referrerURL, language, crawlProfile.isPushCrawlProfile(), this.fulltext().useWebgraph() ? this.fulltext.getWebgraphConfiguration() : null, sourceName); final CollectionConfiguration.SolrVector vector = collectionConfig.yacy2solr(this, collections, responseHeader, document, condenser, referrerURL, language, crawlProfile.isPushCrawlProfile(), this.fulltext().useWebgraph() ? this.fulltext.getWebgraphConfiguration() : null, sourceName);

Loading…
Cancel
Save