diff --git a/htroot/xml/util/getpageinfo_p.java b/htroot/xml/util/getpageinfo_p.java index c27ca9acc..663d61f22 100644 --- a/htroot/xml/util/getpageinfo_p.java +++ b/htroot/xml/util/getpageinfo_p.java @@ -88,16 +88,16 @@ public class getpageinfo_p { int count = 0; for(int i=0;i=0){ try { final yacyURL theURL = new yacyURL(url, null); - + // determine if crawling of the current URL is allowed prop.put("robots-allowed", sb.robots.isDisallowed(theURL) ? "0" : "1"); diff --git a/htroot/xml/util/getpageinfo_p.xml b/htroot/xml/util/getpageinfo_p.xml index a89d94140..34d2cbb05 100644 --- a/htroot/xml/util/getpageinfo_p.xml +++ b/htroot/xml/util/getpageinfo_p.xml @@ -1,6 +1,8 @@ #[title]# + #[desc]# + #[lang]# #(robots-allowed)#0::1::#(/robots-allowed)# #[sitemap]# #[favicon]# @@ -9,4 +11,4 @@ #{/tags}# - \ No newline at end of file + diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 4feab2a3a..91e5d0964 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -395,7 +395,8 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen if (s.length() == 0) { return getTitle().toLowerCase().split(splitrex); } - return s.split(" |,"); + if (s.contains(",")) return s.split(","); + return s.split("\\s"); } public int getRefreshSeconds() {