diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 0ba77481f..06ad0dae2 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -467,7 +467,8 @@ public class ContentScraper extends AbstractScraper implements Scraper { public void scrapeTag1(final String tagname, final Properties tagopts, char[] text) { // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text)); if (tagname.equalsIgnoreCase("a") && text.length < 2048) { - final String href = tagopts.getProperty("href", EMPTY_STRING); + String href = tagopts.getProperty("href", EMPTY_STRING); + href = CharacterCoding.html2unicode(href); AnchorURL url; if ((href.length() > 0) && ((url = absolutePath(href)) != null)) { final String ext = MultiProtocolURL.getFileExtension(url.getFileName());