diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index 9b6b8794a..c6e84ad78 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -500,7 +500,7 @@ public class plasmaCrawlNURL extends indexURL { this.initiator = entry.getColString(1, null); this.url = new URL(urlstring); this.referrer = (entry.empty(3)) ? dummyHash : entry.getColString(3, null); - this.name = (entry.empty(4)) ? "" : entry.getColString(4, null).trim(); + this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); this.loaddate = new Date(86400000 * entry.getColLong(5)); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); this.depth = (int) entry.getColLong(7); @@ -523,7 +523,7 @@ public class plasmaCrawlNURL extends indexURL { (initiator == null) ? "".getBytes() : this.initiator.getBytes(), this.url.toString().getBytes(), this.referrer.getBytes(), - this.name.getBytes(), + this.name.getBytes("UTF-8"), loaddatestr.getBytes(), (this.profileHandle == null) ? null : this.profileHandle.getBytes(), kelondroBase64Order.enhancedCoder.encodeLong(this.depth, urlCrawlDepthLength).getBytes(), diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 1b65bcaad..c3828b16d 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -48,6 +48,7 @@ package de.anomic.plasma; import java.io.File; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.net.InetAddress; import java.net.MalformedURLException; import java.util.Date; @@ -560,12 +561,14 @@ public final class plasmaCrawlStacker { // store the hash in the hash cache // even if the entry exists, we simply overwrite it - byte[][] entry = new byte[][] { + byte[][] entry = null; + try { + entry = new byte[][] { this.urlHash.getBytes(), (this.initiator == null) ? "".getBytes() : this.initiator.getBytes(), this.url.getBytes(), this.referrerHash.getBytes(), - this.name.getBytes(), + this.name.getBytes("UTF-8"), loaddatestr.getBytes(), (this.profileHandle == null) ? null : this.profileHandle.getBytes(), kelondroBase64Order.enhancedCoder.encodeLong(this.depth, indexURL.urlCrawlDepthLength).getBytes(), @@ -574,6 +577,7 @@ public final class plasmaCrawlStacker { this.flags.getBytes(), normalizeHandle(this.handle).getBytes() }; + } catch (UnsupportedEncodingException e) { /* ignore this */ } return entry; } diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java index a75055aaa..51d66e748 100644 --- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java +++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java @@ -110,7 +110,7 @@ public class plasmaSwitchboardQueue { (entry.initiator == null) ? indexURL.dummyHash.getBytes() : entry.initiator.getBytes(), kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, indexURL.urlCrawlDepthLength).getBytes(), (entry.profileHandle == null) ? indexURL.dummyHash.getBytes() : entry.profileHandle.getBytes(), - (entry.anchorName == null) ? "-".getBytes() : entry.anchorName.getBytes() + (entry.anchorName == null) ? "-".getBytes("UTF-8") : entry.anchorName.getBytes("UTF-8") })); }