- fixed letter case bug for dc record creation

- dc parser is now lazy against letter cases

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5998 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 34af8b4877
commit 7639ec2f38

@ -29,22 +29,31 @@ import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.text.Collator;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.TreeMap;
import de.anomic.kelondro.util.DateFormatter;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.yacy.yacyURL;
public class DCEntry extends HashMap<String, String> {
public class DCEntry extends TreeMap<String, String> {
private static final long serialVersionUID = -2050291583515701559L;
public static final DCEntry poison = new DCEntry();
// use a collator to relax when distinguishing between lowercase und uppercase letters
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
}
public DCEntry() {
super();
super((Collator) insensitiveCollator.clone());
}
public DCEntry(
@ -54,12 +63,12 @@ public class DCEntry extends HashMap<String, String> {
String author,
String body
) {
super();
this.put("url", url.toNormalform(true, false));
this.put("dc:Date", DateFormatter.formatISO8601(date));
this.put("dc:Title", title);
this.put("dc:Creator", author);
this.put("dc:Description", body);
super((Collator) insensitiveCollator.clone());
this.put("dc:identifier", url.toNormalform(true, false));
this.put("dc:date", DateFormatter.formatISO8601(date));
this.put("dc:title", title);
this.put("dc:creator", author);
this.put("dc:description", body);
}
/*
@ -84,7 +93,7 @@ public class DCEntry extends HashMap<String, String> {
public Date date() {
String d = this.get("docdatetime");
if (d == null) d = this.get("dc:Date");
if (d == null) d = this.get("dc:date");
if (d == null) return null;
try {
return DateFormatter.parseISO8601(d);
@ -96,7 +105,7 @@ public class DCEntry extends HashMap<String, String> {
public yacyURL url() {
String u = this.get("url");
if (u == null) u = this.get("dc:Identifier");
if (u == null) u = this.get("dc:identifier");
if (u == null) return null;
try {
return new yacyURL(u, null);
@ -108,13 +117,13 @@ public class DCEntry extends HashMap<String, String> {
public String language() {
String l = this.get("language");
if (l == null) l = this.get("dc:Language");
if (l == null) l = this.get("dc:language");
if (l == null) return url().language(); else return l;
}
public String title() {
String t = this.get("title");
if (t == null) t = this.get("dc:Title");
if (t == null) t = this.get("dc:title");
t = stripCDATA(t);
if (t == null) return "";
return t;
@ -122,7 +131,7 @@ public class DCEntry extends HashMap<String, String> {
public String author() {
String t = this.get("author");
if (t == null) t = this.get("dc:Creator");
if (t == null) t = this.get("dc:creator");
t = stripCDATA(t);
if (t == null) return "";
return t;
@ -130,7 +139,7 @@ public class DCEntry extends HashMap<String, String> {
public String body() {
String t = this.get("body");
if (t == null) t = this.get("dc:Description");
if (t == null) t = this.get("dc:description");
t = stripCDATA(t);
if (t == null) return "";
return t;
@ -138,7 +147,7 @@ public class DCEntry extends HashMap<String, String> {
public String[] categories() {
String t = this.get("categories");
if (t == null) this.get("dc:Subject");
if (t == null) this.get("dc:subject");
t = stripCDATA(t);
if (t == null) return new String[]{};
return t.split(";");

@ -104,13 +104,11 @@ public class PhpBB3Dao implements Dao {
}
public Date first() {
StringBuilder sql = new StringBuilder(256);
sql.append("select min(post_time) from " + prefix + "posts");
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
rs = stmt.executeQuery("select min(post_time) from " + prefix + "posts");
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
@ -125,13 +123,11 @@ public class PhpBB3Dao implements Dao {
}
public Date latest() {
StringBuilder sql = new StringBuilder(256);
sql.append("select max(post_time) from " + prefix + "posts");
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
rs = stmt.executeQuery("select max(post_time) from " + prefix + "posts");
if (rs.next()) {
return new Date(rs.getLong(1) * 1000L);
}
@ -165,10 +161,7 @@ public class PhpBB3Dao implements Dao {
}
public DCEntry get(int item) {
StringBuilder sql = new StringBuilder(256);
sql.append("select * from " + prefix + "posts where post_id = ");
sql.append(item);
return getOne(sql);
return getOne("select * from " + prefix + "posts where post_id = " + item);
}
public BlockingQueue<DCEntry> query(int from, int until, int queueSize) {
@ -198,12 +191,12 @@ public class PhpBB3Dao implements Dao {
}
private DCEntry getOne(StringBuilder sql) {
private DCEntry getOne(String sql) {
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql.toString());
rs = stmt.executeQuery(sql);
if (rs.next()) {
try {
return parseResultSet(rs);

Loading…
Cancel
Save