From fd8bd5d0d1855d6555b67399011e93e43aec76b5 Mon Sep 17 00:00:00 2001 From: f1ori Date: Sat, 24 May 2008 16:12:16 +0000 Subject: [PATCH] * fix for http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1176 (encoding issue) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4848 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/xml/bookmarks/tags/get.java | 2 +- htroot/xml/util/getpageinfo_p.java | 8 ++++---- source/de/anomic/htmlFilter/htmlFilterContentScraper.java | 5 +++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/htroot/xml/bookmarks/tags/get.java b/htroot/xml/bookmarks/tags/get.java index bf5818f0e..b2d3da023 100644 --- a/htroot/xml/bookmarks/tags/get.java +++ b/htroot/xml/bookmarks/tags/get.java @@ -106,7 +106,7 @@ public class get { while (it.hasNext()) { tag = it.next(); if(!tag.getTagName().startsWith("/")) { // ignore folder tags - prop.put("tags_"+count+"_name", tag.getTagName()); + prop.putHTML("tags_"+count+"_name", tag.getTagName(), true); prop.put("tags_"+count+"_count", tag.size()); count++; } diff --git a/htroot/xml/util/getpageinfo_p.java b/htroot/xml/util/getpageinfo_p.java index aa9841ac6..f058313ed 100644 --- a/htroot/xml/util/getpageinfo_p.java +++ b/htroot/xml/util/getpageinfo_p.java @@ -74,7 +74,7 @@ public class getpageinfo_p { String url=(String) post.get("url"); if(url.toLowerCase().startsWith("ftp://")){ prop.put("robots-allowed", "1"); - prop.putHTML("title", "FTP: "+url); + prop.putHTML("title", "FTP: "+url, true); return prop; } else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) { url = "http://" + url; @@ -93,7 +93,7 @@ public class getpageinfo_p { writer.close(); // put the document title - prop.putHTML("title", scraper.getTitle()); + prop.putHTML("title", scraper.getTitle(), true); // put the favicon that belongs to the document prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString()); @@ -108,7 +108,7 @@ public class getpageinfo_p { i++; tag += " "+list[i]; } - prop.putHTML("tags_"+count+"_tag", tag); + prop.putHTML("tags_"+count+"_tag", tag, true); count++; } } @@ -127,7 +127,7 @@ public class getpageinfo_p { // get the sitemap URL of the domain yacyURL sitemapURL = robotsParser.getSitemapURL(theURL); - prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString()); + prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString(), true); } catch (MalformedURLException e) {} } diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 467e9ef80..ff037033f 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -61,6 +61,7 @@ import java.util.Properties; import javax.swing.event.EventListenerList; +import de.anomic.data.htmlTools; import de.anomic.http.HttpClient; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; @@ -180,11 +181,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen if (tagname.equalsIgnoreCase("meta")) { String name = tagopts.getProperty("name", ""); if (name.length() > 0) { - metas.put(name.toLowerCase(), tagopts.getProperty("content","")); + metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content",""))); } else { name = tagopts.getProperty("http-equiv", ""); if (name.length() > 0) { - metas.put(name.toLowerCase(), tagopts.getProperty("content","")); + metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content",""))); } } }