diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index b201baab4..0e325b3bd 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -42,6 +42,7 @@ import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Idiom; import net.yacy.document.ParserException; +import net.yacy.document.parser.html.CharacterCoding; import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.ScraperInputStream; import net.yacy.document.parser.html.TransformerWriter; @@ -260,6 +261,7 @@ public class htmlParser extends AbstractParser implements Idiom { Document document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content)); String title = document.dc_title(); System.out.println(title); + System.out.println(CharacterCoding.unicode2html(title, false)); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 907e3a2b7..d185e2988 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -376,13 +376,23 @@ public class URIMetadataRow implements URIMetadata { // parse elements from comp field; byte[] c = this.entry.getColBytes(col_comp, true); List cl = ByteBuffer.split(c, (byte) 10); - this.comp = new Components( + try { + this.comp = new Components( + (cl.size() > 0) ? new String(cl.get(0), "UTF-8") : "", + hash(), + (cl.size() > 1) ? new String(cl.get(1), "UTF-8") : "", + (cl.size() > 2) ? new String(cl.get(2), "UTF-8") : "", + (cl.size() > 3) ? new String(cl.get(3), "UTF-8") : "", + (cl.size() > 4) ? new String(cl.get(4), "UTF-8") : ""); + } catch (UnsupportedEncodingException e) { + this.comp = new Components( (cl.size() > 0) ? new String(cl.get(0)) : "", hash(), (cl.size() > 1) ? new String(cl.get(1)) : "", (cl.size() > 2) ? new String(cl.get(2)) : "", (cl.size() > 3) ? new String(cl.get(3)) : "", (cl.size() > 4) ? new String(cl.get(4)) : ""); + } return this.comp; }