git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6923 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 3a1cebb598
commit de4f30bb2e

@ -42,6 +42,7 @@ import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Idiom;
import net.yacy.document.ParserException;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.ScraperInputStream;
import net.yacy.document.parser.html.TransformerWriter;
@ -260,6 +261,7 @@ public class htmlParser extends AbstractParser implements Idiom {
Document document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
String title = document.dc_title();
System.out.println(title);
System.out.println(CharacterCoding.unicode2html(title, false));
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {

@ -376,13 +376,23 @@ public class URIMetadataRow implements URIMetadata {
// parse elements from comp field;
byte[] c = this.entry.getColBytes(col_comp, true);
List<byte[]> cl = ByteBuffer.split(c, (byte) 10);
this.comp = new Components(
try {
this.comp = new Components(
(cl.size() > 0) ? new String(cl.get(0), "UTF-8") : "",
hash(),
(cl.size() > 1) ? new String(cl.get(1), "UTF-8") : "",
(cl.size() > 2) ? new String(cl.get(2), "UTF-8") : "",
(cl.size() > 3) ? new String(cl.get(3), "UTF-8") : "",
(cl.size() > 4) ? new String(cl.get(4), "UTF-8") : "");
} catch (UnsupportedEncodingException e) {
this.comp = new Components(
(cl.size() > 0) ? new String(cl.get(0)) : "",
hash(),
(cl.size() > 1) ? new String(cl.get(1)) : "",
(cl.size() > 2) ? new String(cl.get(2)) : "",
(cl.size() > 3) ? new String(cl.get(3)) : "",
(cl.size() > 4) ? new String(cl.get(4)) : "");
}
return this.comp;
}

Loading…
Cancel
Save