*) Bugfix for UTF-8: url names are now stored properly in stackcrawl, crawler, indexing queue and should be displayed correct on the gui

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2630 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent cf6acff2c2
commit 26dfbb7499

@ -500,7 +500,7 @@ public class plasmaCrawlNURL extends indexURL {
this.initiator = entry.getColString(1, null);
this.url = new URL(urlstring);
this.referrer = (entry.empty(3)) ? dummyHash : entry.getColString(3, null);
this.name = (entry.empty(4)) ? "" : entry.getColString(4, null).trim();
this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim();
this.loaddate = new Date(86400000 * entry.getColLong(5));
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
this.depth = (int) entry.getColLong(7);
@ -523,7 +523,7 @@ public class plasmaCrawlNURL extends indexURL {
(initiator == null) ? "".getBytes() : this.initiator.getBytes(),
this.url.toString().getBytes(),
this.referrer.getBytes(),
this.name.getBytes(),
this.name.getBytes("UTF-8"),
loaddatestr.getBytes(),
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
kelondroBase64Order.enhancedCoder.encodeLong(this.depth, urlCrawlDepthLength).getBytes(),

@ -48,6 +48,7 @@ package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.Date;
@ -560,12 +561,14 @@ public final class plasmaCrawlStacker {
// store the hash in the hash cache
// even if the entry exists, we simply overwrite it
byte[][] entry = new byte[][] {
byte[][] entry = null;
try {
entry = new byte[][] {
this.urlHash.getBytes(),
(this.initiator == null) ? "".getBytes() : this.initiator.getBytes(),
this.url.getBytes(),
this.referrerHash.getBytes(),
this.name.getBytes(),
this.name.getBytes("UTF-8"),
loaddatestr.getBytes(),
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
kelondroBase64Order.enhancedCoder.encodeLong(this.depth, indexURL.urlCrawlDepthLength).getBytes(),
@ -574,6 +577,7 @@ public final class plasmaCrawlStacker {
this.flags.getBytes(),
normalizeHandle(this.handle).getBytes()
};
} catch (UnsupportedEncodingException e) { /* ignore this */ }
return entry;
}

@ -110,7 +110,7 @@ public class plasmaSwitchboardQueue {
(entry.initiator == null) ? indexURL.dummyHash.getBytes() : entry.initiator.getBytes(),
kelondroBase64Order.enhancedCoder.encodeLong((long) entry.depth, indexURL.urlCrawlDepthLength).getBytes(),
(entry.profileHandle == null) ? indexURL.dummyHash.getBytes() : entry.profileHandle.getBytes(),
(entry.anchorName == null) ? "-".getBytes() : entry.anchorName.getBytes()
(entry.anchorName == null) ? "-".getBytes("UTF-8") : entry.anchorName.getBytes("UTF-8")
}));
}

Loading…
Cancel
Save