git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4848 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 17 years ago
parent 25192e0d36
commit fd8bd5d0d1

@ -106,7 +106,7 @@ public class get {
while (it.hasNext()) { while (it.hasNext()) {
tag = it.next(); tag = it.next();
if(!tag.getTagName().startsWith("/")) { // ignore folder tags if(!tag.getTagName().startsWith("/")) { // ignore folder tags
prop.put("tags_"+count+"_name", tag.getTagName()); prop.putHTML("tags_"+count+"_name", tag.getTagName(), true);
prop.put("tags_"+count+"_count", tag.size()); prop.put("tags_"+count+"_count", tag.size());
count++; count++;
} }

@ -74,7 +74,7 @@ public class getpageinfo_p {
String url=(String) post.get("url"); String url=(String) post.get("url");
if(url.toLowerCase().startsWith("ftp://")){ if(url.toLowerCase().startsWith("ftp://")){
prop.put("robots-allowed", "1"); prop.put("robots-allowed", "1");
prop.putHTML("title", "FTP: "+url); prop.putHTML("title", "FTP: "+url, true);
return prop; return prop;
} else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) { } else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) {
url = "http://" + url; url = "http://" + url;
@ -93,7 +93,7 @@ public class getpageinfo_p {
writer.close(); writer.close();
// put the document title // put the document title
prop.putHTML("title", scraper.getTitle()); prop.putHTML("title", scraper.getTitle(), true);
// put the favicon that belongs to the document // put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString()); prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
@ -108,7 +108,7 @@ public class getpageinfo_p {
i++; i++;
tag += " "+list[i]; tag += " "+list[i];
} }
prop.putHTML("tags_"+count+"_tag", tag); prop.putHTML("tags_"+count+"_tag", tag, true);
count++; count++;
} }
} }
@ -127,7 +127,7 @@ public class getpageinfo_p {
// get the sitemap URL of the domain // get the sitemap URL of the domain
yacyURL sitemapURL = robotsParser.getSitemapURL(theURL); yacyURL sitemapURL = robotsParser.getSitemapURL(theURL);
prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString()); prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString(), true);
} catch (MalformedURLException e) {} } catch (MalformedURLException e) {}
} }

@ -61,6 +61,7 @@ import java.util.Properties;
import javax.swing.event.EventListenerList; import javax.swing.event.EventListenerList;
import de.anomic.data.htmlTools;
import de.anomic.http.HttpClient; import de.anomic.http.HttpClient;
import de.anomic.server.serverCharBuffer; import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverFileUtils; import de.anomic.server.serverFileUtils;
@ -180,11 +181,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (tagname.equalsIgnoreCase("meta")) { if (tagname.equalsIgnoreCase("meta")) {
String name = tagopts.getProperty("name", ""); String name = tagopts.getProperty("name", "");
if (name.length() > 0) { if (name.length() > 0) {
metas.put(name.toLowerCase(), tagopts.getProperty("content","")); metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
} else { } else {
name = tagopts.getProperty("http-equiv", ""); name = tagopts.getProperty("http-equiv", "");
if (name.length() > 0) { if (name.length() > 0) {
metas.put(name.toLowerCase(), tagopts.getProperty("content","")); metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
} }
} }
} }

Loading…
Cancel
Save