more bugfixes to date parser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6864 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent cf43bdc87e
commit f23cbd2dab

@ -96,6 +96,7 @@ public class DCEntry extends TreeMap<String, String> {
String d = this.get("docdatetime");
if (d == null) d = this.get("dc:date");
if (d == null) return null;
if (d.length() == 0) return null;
try {
return DateFormatter.parseISO8601(d);
} catch (ParseException e) {

@ -42,6 +42,7 @@ import net.yacy.kelondro.logging.Log;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
@ -78,6 +79,8 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
public void run() {
try {
this.saxParser.parse(this.stream, this);
} catch (SAXParseException e) {
Log.logException(e);
} catch (SAXException e) {
Log.logException(e);
} catch (IOException e) {

@ -185,12 +185,16 @@ public final class DateFormatter {
public static Date parseISO8601(String s) throws ParseException {
// do some lazy checks here
s = s.trim();
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
if (s.startsWith("{")) s = s.substring(1);
if (s.endsWith("}")) s = s.substring(0, s.length() - 1);
if (s.startsWith("[")) s = s.substring(1);
if (s.endsWith("]")) s = s.substring(0, s.length() - 1);
while (s.charAt(0) > '9' || s.charAt(0) < '0') s = s.substring(1);
while (s.length() > 0 && (s.charAt(0) > '9' || s.charAt(0) < '0')) s = s.substring(1);
if (s.endsWith("--")) s = s.substring(0, s.length() - 2) + "00";
int p = s.indexOf(';'); if (p >= 0) s = s.substring(0, p); // a semicolon may be used to separate two dates from each other; then we take the first
p = s.indexOf(','); if (p >= 0) s = s.substring(0, p); // a comma may be used to separate two dates from each other; then we take the first
while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
// no go for exact parsing
final Calendar cal = Calendar.getInstance(TZ_GMT, Locale.US);

Loading…
Cancel
Save