From 7639ec2f38a493d00f0725718c8a57bcd73e46b7 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 29 May 2009 15:09:37 +0000 Subject: [PATCH] - fixed letter case bug for dc record creation - dc parser is now lazy against letter cases git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5998 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/content/DCEntry.java | 41 +++++++++++++-------- source/de/anomic/content/dao/PhpBB3Dao.java | 17 +++------ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/source/de/anomic/content/DCEntry.java b/source/de/anomic/content/DCEntry.java index f40065a91..0b1679dd5 100644 --- a/source/de/anomic/content/DCEntry.java +++ b/source/de/anomic/content/DCEntry.java @@ -29,22 +29,31 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.text.Collator; import java.text.ParseException; import java.util.Date; -import java.util.HashMap; import java.util.HashSet; +import java.util.Locale; +import java.util.TreeMap; import de.anomic.kelondro.util.DateFormatter; import de.anomic.plasma.plasmaParserDocument; import de.anomic.yacy.yacyURL; -public class DCEntry extends HashMap { +public class DCEntry extends TreeMap { private static final long serialVersionUID = -2050291583515701559L; public static final DCEntry poison = new DCEntry(); + // use a collator to relax when distinguishing between lowercase und uppercase letters + private static final Collator insensitiveCollator = Collator.getInstance(Locale.US); + static { + insensitiveCollator.setStrength(Collator.SECONDARY); + insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION); + } + public DCEntry() { - super(); + super((Collator) insensitiveCollator.clone()); } public DCEntry( @@ -54,12 +63,12 @@ public class DCEntry extends HashMap { String author, String body ) { - super(); - this.put("url", url.toNormalform(true, false)); - this.put("dc:Date", DateFormatter.formatISO8601(date)); - this.put("dc:Title", title); - this.put("dc:Creator", author); - this.put("dc:Description", body); + super((Collator) insensitiveCollator.clone()); + this.put("dc:identifier", url.toNormalform(true, false)); + this.put("dc:date", DateFormatter.formatISO8601(date)); + this.put("dc:title", title); + this.put("dc:creator", author); + this.put("dc:description", body); } /* @@ -84,7 +93,7 @@ public class DCEntry extends HashMap { public Date date() { String d = this.get("docdatetime"); - if (d == null) d = this.get("dc:Date"); + if (d == null) d = this.get("dc:date"); if (d == null) return null; try { return DateFormatter.parseISO8601(d); @@ -96,7 +105,7 @@ public class DCEntry extends HashMap { public yacyURL url() { String u = this.get("url"); - if (u == null) u = this.get("dc:Identifier"); + if (u == null) u = this.get("dc:identifier"); if (u == null) return null; try { return new yacyURL(u, null); @@ -108,13 +117,13 @@ public class DCEntry extends HashMap { public String language() { String l = this.get("language"); - if (l == null) l = this.get("dc:Language"); + if (l == null) l = this.get("dc:language"); if (l == null) return url().language(); else return l; } public String title() { String t = this.get("title"); - if (t == null) t = this.get("dc:Title"); + if (t == null) t = this.get("dc:title"); t = stripCDATA(t); if (t == null) return ""; return t; @@ -122,7 +131,7 @@ public class DCEntry extends HashMap { public String author() { String t = this.get("author"); - if (t == null) t = this.get("dc:Creator"); + if (t == null) t = this.get("dc:creator"); t = stripCDATA(t); if (t == null) return ""; return t; @@ -130,7 +139,7 @@ public class DCEntry extends HashMap { public String body() { String t = this.get("body"); - if (t == null) t = this.get("dc:Description"); + if (t == null) t = this.get("dc:description"); t = stripCDATA(t); if (t == null) return ""; return t; @@ -138,7 +147,7 @@ public class DCEntry extends HashMap { public String[] categories() { String t = this.get("categories"); - if (t == null) this.get("dc:Subject"); + if (t == null) this.get("dc:subject"); t = stripCDATA(t); if (t == null) return new String[]{}; return t.split(";"); diff --git a/source/de/anomic/content/dao/PhpBB3Dao.java b/source/de/anomic/content/dao/PhpBB3Dao.java index 593a2798e..6ecbe0d26 100644 --- a/source/de/anomic/content/dao/PhpBB3Dao.java +++ b/source/de/anomic/content/dao/PhpBB3Dao.java @@ -104,13 +104,11 @@ public class PhpBB3Dao implements Dao { } public Date first() { - StringBuilder sql = new StringBuilder(256); - sql.append("select min(post_time) from " + prefix + "posts"); Statement stmt = null; ResultSet rs = null; try { stmt = conn.createStatement(); - rs = stmt.executeQuery(sql.toString()); + rs = stmt.executeQuery("select min(post_time) from " + prefix + "posts"); if (rs.next()) { return new Date(rs.getLong(1) * 1000L); } @@ -125,13 +123,11 @@ public class PhpBB3Dao implements Dao { } public Date latest() { - StringBuilder sql = new StringBuilder(256); - sql.append("select max(post_time) from " + prefix + "posts"); Statement stmt = null; ResultSet rs = null; try { stmt = conn.createStatement(); - rs = stmt.executeQuery(sql.toString()); + rs = stmt.executeQuery("select max(post_time) from " + prefix + "posts"); if (rs.next()) { return new Date(rs.getLong(1) * 1000L); } @@ -165,10 +161,7 @@ public class PhpBB3Dao implements Dao { } public DCEntry get(int item) { - StringBuilder sql = new StringBuilder(256); - sql.append("select * from " + prefix + "posts where post_id = "); - sql.append(item); - return getOne(sql); + return getOne("select * from " + prefix + "posts where post_id = " + item); } public BlockingQueue query(int from, int until, int queueSize) { @@ -198,12 +191,12 @@ public class PhpBB3Dao implements Dao { } - private DCEntry getOne(StringBuilder sql) { + private DCEntry getOne(String sql) { Statement stmt = null; ResultSet rs = null; try { stmt = conn.createStatement(); - rs = stmt.executeQuery(sql.toString()); + rs = stmt.executeQuery(sql); if (rs.next()) { try { return parseResultSet(rs);