support scraping start-/enddate from html tag with property "datetime"

This may be used in html5 <time> tag (which we don't explicite support yet for date in content scraping).
pull/44/head
reger 9 years ago
parent 900d4584ba
commit 2048b7e057

@ -389,16 +389,17 @@ public class ContentScraper extends AbstractScraper implements Scraper {
// itemprop
String itemprop = tag.opts.getProperty("itemprop");
if (itemprop != null) {
String content = tag.opts.getProperty("content");
if (content != null) {
String propval = tag.opts.getProperty("content");
if (propval == null) propval = tag.opts.getProperty("datetime"); // html5 example: <time itemprop="startDate" datetime="2016-01-26">today</time> while each prop is optional
if (propval != null) {
if ("startDate".equals(itemprop)) try {
// parse ISO 8601 date
Date startDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
Date startDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
this.startDates.add(startDate);
} catch (ParseException e) {}
if ("endDate".equals(itemprop)) try {
// parse ISO 8601 date
Date endDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
Date endDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
this.endDates.add(endDate);
} catch (ParseException e) {}
}

Loading…
Cancel
Save