From f23cbd2dab28ffde99d01e8fc8113fbd9ce2a949 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 11 May 2010 11:32:46 +0000 Subject: [PATCH] more bugfixes to date parser git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6864 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/net/yacy/document/content/DCEntry.java | 1 + source/net/yacy/document/content/SurrogateReader.java | 3 +++ source/net/yacy/kelondro/util/DateFormatter.java | 6 +++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/source/net/yacy/document/content/DCEntry.java b/source/net/yacy/document/content/DCEntry.java index 372e94f38..d1a4565b2 100644 --- a/source/net/yacy/document/content/DCEntry.java +++ b/source/net/yacy/document/content/DCEntry.java @@ -96,6 +96,7 @@ public class DCEntry extends TreeMap { String d = this.get("docdatetime"); if (d == null) d = this.get("dc:date"); if (d == null) return null; + if (d.length() == 0) return null; try { return DateFormatter.parseISO8601(d); } catch (ParseException e) { diff --git a/source/net/yacy/document/content/SurrogateReader.java b/source/net/yacy/document/content/SurrogateReader.java index 6a2e3da97..6578eeea9 100644 --- a/source/net/yacy/document/content/SurrogateReader.java +++ b/source/net/yacy/document/content/SurrogateReader.java @@ -42,6 +42,7 @@ import net.yacy.kelondro.logging.Log; import org.xml.sax.Attributes; import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; @@ -78,6 +79,8 @@ public class SurrogateReader extends DefaultHandler implements Runnable { public void run() { try { this.saxParser.parse(this.stream, this); + } catch (SAXParseException e) { + Log.logException(e); } catch (SAXException e) { Log.logException(e); } catch (IOException e) { diff --git a/source/net/yacy/kelondro/util/DateFormatter.java b/source/net/yacy/kelondro/util/DateFormatter.java index 595de7caf..609c9a72b 100644 --- a/source/net/yacy/kelondro/util/DateFormatter.java +++ b/source/net/yacy/kelondro/util/DateFormatter.java @@ -185,12 +185,16 @@ public final class DateFormatter { public static Date parseISO8601(String s) throws ParseException { // do some lazy checks here s = s.trim(); + while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date if (s.startsWith("{")) s = s.substring(1); if (s.endsWith("}")) s = s.substring(0, s.length() - 1); if (s.startsWith("[")) s = s.substring(1); if (s.endsWith("]")) s = s.substring(0, s.length() - 1); - while (s.charAt(0) > '9' || s.charAt(0) < '0') s = s.substring(1); + while (s.length() > 0 && (s.charAt(0) > '9' || s.charAt(0) < '0')) s = s.substring(1); if (s.endsWith("--")) s = s.substring(0, s.length() - 2) + "00"; + int p = s.indexOf(';'); if (p >= 0) s = s.substring(0, p); // a semicolon may be used to separate two dates from each other; then we take the first + p = s.indexOf(','); if (p >= 0) s = s.substring(0, p); // a comma may be used to separate two dates from each other; then we take the first + while (s.length() > 0 && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date // no go for exact parsing final Calendar cal = Calendar.getInstance(TZ_GMT, Locale.US);