Replace the ISO8601 aka W3C datetime parser by one that supports every representation allowed by this standard, see http://www.w3.org/TR/NOTE-datetime

- useful expecially for sitemaps parsing, where this date format is used

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4286 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
fuchsi 17 years ago
parent 33ee6745f6
commit 1cb6e431a6

@ -58,7 +58,7 @@ public class get {
Date parsedDate = null;
try {
parsedDate = serverDate.iso8601ToDate(date);
parsedDate = serverDate.parseISO8601(date);
} catch (ParseException e) {
parsedDate = new Date();
}

@ -337,7 +337,7 @@ public class SitemapParser extends DefaultHandler {
} else if (this.currentElement.equalsIgnoreCase(SITEMAP_URL_LASTMOD)) {
String dateStr = new String(buf,offset,len);
try {
this.lastMod = serverDate.iso8601ToDate(dateStr);
this.lastMod = serverDate.parseISO8601(dateStr);
} catch (ParseException e) {
this.logger.logInfo("Unable to parse datestring '" + dateStr + "'");
}

@ -507,7 +507,7 @@ public class bookmarksDB {
Date parsedDate = null;
try {
parsedDate = serverDate.iso8601ToDate(time);
parsedDate = serverDate.parseISO8601(time);
} catch (ParseException e) {
parsedDate = new Date();
}

@ -50,6 +50,8 @@ import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
import java.util.TimeZone;
import de.anomic.server.logging.serverLog;
@ -149,6 +151,125 @@ public final class serverDate {
return null;
}
/**
* Creates a String representation of a Date using the format defined
* in ISO8601/W3C datetime
* The result will be in UTC/GMT, e.g. "2007-12-19T10:20:30Z".
*
* @param date The Date instance to transform.
* @return A fixed width (20 chars) ISO8601 date String.
*/
public static String formatISO8601(Date date){
return format(FORMAT_ISO8601, date);
}
/**
* Parse dates as defined in {@linkplain http://www.w3.org/TR/NOTE-datetime}.
* This format (also specified in ISO8601) allows different "precisions".
* The following lower precision versions for the complete date
* "2007-12-19T10:20:30.567+0300" are allowed:<br>
* "2007"<br>
* "2007-12"<br>
* "2007-12-19"<br>
* "2007-12-19T10:20+0300<br>
* "2007-12-19T10:20:30+0300<br>
* "2007-12-19T10:20:30.567+0300<br>
* Additionally a timezone offset of "+0000" can be substituted as "Z".<br>
* Parsing is done in a fuzzy way. If there is an illegal character somewhere in
* the String, the date parsed so far will be returned, e.g. the input
* "2007-12-19FOO" would return a date that represents "2007-12-19".
*
* @param s
* @return
* @throws ParseException
*/
public static Date parseISO8601(String s) throws ParseException {
Calendar cal = Calendar.getInstance(TZ_GMT, Locale.US);
cal.clear();
// split 2007-12-19T10:20:30.789+0500 into its parts
// correct: yyyy['-'MM['-'dd['T'HH':'MM[':'ss['.'SSS]]('Z'|ZZZZZ)]]]
StringTokenizer t = new StringTokenizer(s, "-T:.Z+", true);
if (s == null || t.countTokens() == 0)
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
try {
// year
cal.set(Calendar.YEAR, Integer.parseInt(t.nextToken()));
// month
if (t.nextToken().equals("-")) {
cal.set(Calendar.MONTH, Integer.parseInt(t.nextToken()) - 1);
} else {
return cal.getTime();
}
// day
if (t.nextToken().equals("-")) {
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(t.nextToken()));
} else {
return cal.getTime();
}
// The standard says: if there is an hour there has to be a minute and a
// timezone token, too.
// hour
if (t.nextToken().equals("T")) {
int hour = Integer.parseInt(t.nextToken());
// no error, got hours
int min = 0;
int sec = 0;
int msec = 0;
if (t.nextToken().equals(":")) {
min = Integer.parseInt(t.nextToken());
// no error, got minutes
// need TZ or seconds
String token = t.nextToken();
if (token.equals(":")) {
sec = Integer.parseInt(t.nextToken());
// need millisecs or TZ
token = t.nextToken();
if (token.equals(".")) {
msec = Integer.parseInt(t.nextToken());
// need TZ
token = t.nextToken();
}
}
// check for TZ data
int offset;
if (token.equals("Z")) {
offset = 0;
} else {
int sign = 0;
if (token.equals("+")) {
sign = 1;
} else if (token.equals("-")) {
sign = -1;
} else {
// no legal TZ offset found
return cal.getTime();
}
offset = sign * Integer.parseInt(t.nextToken()) * 10 * 3600;
}
cal.set(Calendar.ZONE_OFFSET, offset);
}
cal.set(Calendar.HOUR_OF_DAY, hour);
cal.set(Calendar.MINUTE, min);
cal.set(Calendar.SECOND, sec);
cal.set(Calendar.MILLISECOND, msec);
}
} catch (NoSuchElementException e) {
// ignore this as it is perfectly fine to have non-complete date in this format
} catch (Exception e) {
// catch all Exceptions and return what we parsed so far
serverLog.logInfo("SERVER", "parseISO8601: DATE ERROR with: '" + s + "' got so far: '" + cal.toString());
}
// in case we couldn't even parse a year
if (!cal.isSet(Calendar.YEAR))
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
return cal.getTime();
}
/**
* Note: The short day format doesn't include any timezone information. This method
* transforms the date into the GMT/UTC timezone. Example: If the local system time is,
@ -462,43 +583,6 @@ public final class serverDate {
return new String(result);
}
public static Date iso8601ToDate(String iso8601) throws ParseException{
String[] tmp=iso8601.split("T");
if(tmp.length!=2){
//Error parsing Date
return new Date();
}
String day=tmp[0];
String time=tmp[1];
if(time.length()>8){
time=time.substring(0,8);
}
Calendar date=Calendar.getInstance();
Calendar date2=Calendar.getInstance();
date.setTime(new SimpleDateFormat("yyyy-MM-dd").parse(day));
date2.setTime(new SimpleDateFormat("HH:mm:ss").parse(time));
date.set(Calendar.HOUR_OF_DAY, date2.get(Calendar.HOUR_OF_DAY));
date.set(Calendar.MINUTE, date2.get(Calendar.MINUTE));
date.set(Calendar.SECOND, date2.get(Calendar.SECOND));
return date.getTime();
}
/**
* Creates a String representation of a Date using the format defined
* in ISO8601.
* The result will be in UTC/GMT, e.g. "2007-12-19T10:20:30Z"
* @param date The Date instance to transform.
* @return A fixed width (20 chars) ISO8601 date String.
*/
public static String formatISO8601(Date date){
synchronized (FORMAT_ISO8601) {
return FORMAT_ISO8601.format(date);
}
}
public static String intervalToString(long millis) {
try {
long mins = millis / 60000;

Loading…
Cancel
Save