From 57415b6889b3aee3871e37d6854e72bf82665d8d Mon Sep 17 00:00:00 2001 From: theli Date: Fri, 22 Sep 2006 05:40:29 +0000 Subject: [PATCH] *) Bugfix for surftipps UTF-8 problem See: http://www.yacy-forum.de/viewtopic.php?t=2864 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2647 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/index.java | 32 ++++++++++++++------------- source/de/anomic/yacy/yacyNewsDB.java | 30 +++++++++++++++---------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/htroot/index.java b/htroot/index.java index f2f96f121..2d3debbe8 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -208,11 +208,13 @@ public class index { while (k.hasNext()) { urlhash = (String) k.next(); if (urlhash == null) continue; + row = (kelondroRow.Entry) surftipps.get(urlhash); if (row == null) continue; + url = row.getColString(0, null); - title = row.getColString(1, null); - description = row.getColString(2, null); + title = row.getColString(1,"UTF-8"); + description = row.getColString(2,"UTF-8"); if ((url == null) || (title == null) || (description == null)) continue; refid = row.getColString(3, null); voted = false; @@ -292,7 +294,7 @@ public class index { entry = rowdef.newEntry(new byte[][]{ url.getBytes(), ((intention.length() == 0) ? record.attribute("startURL", "") : intention).getBytes(), - ("Crawl Start Point").getBytes(), + ("Crawl Start Point").getBytes("UTF-8"), record.id().getBytes() }); score = 2 + Math.min(10, intention.length() / 4) + timeFactor(record.created()); @@ -302,8 +304,8 @@ public class index { url = record.attribute("homepage", ""); entry = rowdef.newEntry(new byte[][]{ url.getBytes(), - ("Home Page of " + record.attribute("nickname", "")).getBytes(), - ("Profile Update").getBytes(), + ("Home Page of " + record.attribute("nickname", "")).getBytes("UTF-8"), + ("Profile Update").getBytes("UTF-8"), record.id().getBytes() }); score = 1 + timeFactor(record.created()); @@ -313,8 +315,8 @@ public class index { url = record.attribute("url", ""); entry = rowdef.newEntry(new byte[][]{ url.getBytes(), - (record.attribute("title", "")).getBytes(), - ("Bookmark: " + record.attribute("description", "")).getBytes(), + (record.attribute("title", "")).getBytes("UTF-8"), + ("Bookmark: " + record.attribute("description", "")).getBytes("UTF-8"), record.id().getBytes() }); score = 8 + timeFactor(record.created()); @@ -324,8 +326,8 @@ public class index { url = record.attribute("url", ""); entry = rowdef.newEntry(new byte[][]{ url.getBytes(), - (record.attribute("title", "")).getBytes(), - ("Surf Tipp: " + record.attribute("description", "")).getBytes(), + (record.attribute("title", "")).getBytes("UTF-8"), + ("Surf Tipp: " + record.attribute("description", "")).getBytes("UTF-8"), record.id().getBytes() }); score = 5 + timeFactor(record.created()); @@ -336,8 +338,8 @@ public class index { url = record.attribute("url", ""); entry = rowdef.newEntry(new byte[][]{ url.getBytes(), - record.attribute("title", "").getBytes(), - record.attribute("description", "").getBytes(), + record.attribute("title", "").getBytes("UTF-8"), + record.attribute("description", "").getBytes("UTF-8"), record.attribute("refid", "").getBytes() }); score = 5 + timeFactor(record.created()); @@ -350,8 +352,8 @@ public class index { url = "http://" + seed.getAddress() + "/Wiki.html?page=" + record.attribute("page", ""); entry = rowdef.newEntry(new byte[][]{ url.getBytes(), - (record.attribute("author", "Anonymous") + ": " + record.attribute("page", "")).getBytes(), - ("Wiki Update: " + record.attribute("description", "")).getBytes(), + (record.attribute("author", "Anonymous") + ": " + record.attribute("page", "")).getBytes("UTF-8"), + ("Wiki Update: " + record.attribute("description", "")).getBytes("UTF-8"), record.id().getBytes() }); score = 4 + timeFactor(record.created()); @@ -365,8 +367,8 @@ public class index { url = "http://" + seed.getAddress() + "/Blog.html?page=" + record.attribute("page", ""); entry = rowdef.newEntry(new byte[][]{ url.getBytes(), - (record.attribute("author", "Anonymous") + ": " + record.attribute("page", "")).getBytes(), - ("Blog Entry: " + record.attribute("subject", "")).getBytes(), + (record.attribute("author", "Anonymous") + ": " + record.attribute("page", "")).getBytes("UTF-8"), + ("Blog Entry: " + record.attribute("subject", "")).getBytes("UTF-8"), record.id().getBytes() }); score = 4 + timeFactor(record.created()); diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index f8c9e13a4..5a5b61e9d 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -46,6 +46,7 @@ package de.anomic.yacy; import java.io.File; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Iterator; import de.anomic.yacy.yacyCore; @@ -157,24 +158,29 @@ public class yacyNewsDB { if (b == null) return null; return new yacyNewsRecord( b.getColString(0, null), - b.getColString(1, null), + b.getColString(1, "UTF-8"), (b.empty(2)) ? null : yacyCore.parseUniversalDate(b.getColString(2, null), serverDate.UTCDiffString()), (int) b.getColLong(3), - serverCodings.string2map(b.getColString(4, null)) + serverCodings.string2map(b.getColString(4, "UTF-8")) ); } protected final kelondroRow.Entry r2b(yacyNewsRecord r) { - if (r == null) return null; - String attributes = r.attributes().toString(); - if (attributes.length() > yacyNewsRecord.attributesMaxLength) throw new IllegalArgumentException("attribute length=" + attributes.length() + " exceeds maximum size=" + yacyNewsRecord.attributesMaxLength); - kelondroRow.Entry entry = news.row().newEntry(); - entry.setCol(0, r.id().getBytes()); - entry.setCol(1, r.category().getBytes()); - entry.setCol(2, (r.received() == null) ? null : yacyCore.universalDateShortString(r.received()).getBytes()); - entry.setCol(3, kelondroBase64Order.enhancedCoder.encodeLong(r.distributed(), 2).getBytes()); - entry.setCol(4, attributes.getBytes()); - return entry; + try { + if (r == null) return null; + String attributes = r.attributes().toString(); + if (attributes.length() > yacyNewsRecord.attributesMaxLength) throw new IllegalArgumentException("attribute length=" + attributes.length() + " exceeds maximum size=" + yacyNewsRecord.attributesMaxLength); + kelondroRow.Entry entry = this.news.row().newEntry(); + entry.setCol(0, r.id().getBytes()); + entry.setCol(1, r.category().getBytes("UTF-8")); + entry.setCol(2, (r.received() == null) ? null : yacyCore.universalDateShortString(r.received()).getBytes()); + entry.setCol(3, kelondroBase64Order.enhancedCoder.encodeLong(r.distributed(), 2).getBytes()); + entry.setCol(4, attributes.getBytes("UTF-8")); + return entry; + } catch(UnsupportedEncodingException e) { + // ignore this. this should never occure + return null; + } } } \ No newline at end of file