adjust mergeDocument after parsing to

- preserve charset and languages
- fix merge of author
pull/1/head
reger 11 years ago
parent 0d29b972cc
commit 2d67f29244

@ -817,17 +817,22 @@ dc_rights
final List<AnchorURL> anchors = new ArrayList<AnchorURL>(); final List<AnchorURL> anchors = new ArrayList<AnchorURL>();
final LinkedHashMap<DigestURL, String> rss = new LinkedHashMap<DigestURL, String>(); final LinkedHashMap<DigestURL, String> rss = new LinkedHashMap<DigestURL, String>();
final LinkedHashMap<AnchorURL, ImageEntry> images = new LinkedHashMap<AnchorURL, ImageEntry>(); final LinkedHashMap<AnchorURL, ImageEntry> images = new LinkedHashMap<AnchorURL, ImageEntry>();
final Set<String> languages = new HashSet<String>();
double lon = 0.0d, lat = 0.0d; double lon = 0.0d, lat = 0.0d;
Date date = new Date(); Date date = new Date();
String charset = null;
int mindepth = 999; int mindepth = 999;
for (final Document doc: docs) { for (final Document doc: docs) {
if (doc == null) continue; if (doc == null) continue;
if (charset == null) charset = doc.charset; // TODO: uses this charset for merged content
final String author = doc.dc_creator(); final String author = doc.dc_creator();
if (author.length() > 0) { if (author.length() > 0) {
if (authors.length() > 0) authors.append(","); if (authors.length() > 0) authors.append(",");
subjects.append(author); authors.append(author);
} }
final String publisher = doc.dc_publisher(); final String publisher = doc.dc_publisher();
@ -861,6 +866,7 @@ dc_rights
if (doc.date.before(date)) date = doc.date; if (doc.date.before(date)) date = doc.date;
if (doc.getDepth() < mindepth) mindepth = doc.getDepth(); if (doc.getDepth() < mindepth) mindepth = doc.getDepth();
if (doc.dc_language() != null) languages.add(doc.dc_language());
} }
// clean up parser data // clean up parser data
@ -878,9 +884,9 @@ dc_rights
Document newDoc = new Document( Document newDoc = new Document(
location, location,
globalMime, globalMime,
charset,
null, null,
null, languages,
null,
subjects.toString().split(" |,"), subjects.toString().split(" |,"),
titlesa, titlesa,
authors.toString(), authors.toString(),

Loading…
Cancel
Save