small fixes to getpageinfo_p.xml and htmlFilterContentScraper.java with respect to keyword extraction

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5185 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
apfelmaennchen 17 years ago
parent 029e16b653
commit 5e8bd0f29c

@ -46,11 +46,18 @@ public class getpageinfo_p {
public static serverObjects respond(final httpRequestHeader header, final serverObjects post, final serverSwitch<?> env) {
final plasmaSwitchboard sb = (plasmaSwitchboard) env;
final serverObjects prop = new serverObjects();
prop.put("sitemap", "");
prop.put("title", "");
prop.put("favicon","");
// avoid UNRESOLVED PATTERN
prop.put("title", "");
prop.put("desc", "");
prop.put("lang", "");
prop.put("robots-allowed", "3"); //unknown
String actions="title";
prop.put("sitemap", "");
prop.put("favicon","");
// default actions
String actions="title,robots";
if(post!=null && post.containsKey("url")){
if(post.containsKey("actions"))
actions=post.get("actions");

@ -395,7 +395,8 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (s.length() == 0) {
return getTitle().toLowerCase().split(splitrex);
}
if (s.contains(",")) return s.split(",");
if (s.contains(",")) return s.split(" |,");
if (s.contains(";")) return s.split(" |;");
return s.split("\\s");
}

Loading…
Cancel
Save