diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 4901fe73a..c207a2948 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -77,6 +77,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen linkTags0.add("base"); linkTags0.add("frame"); linkTags0.add("meta"); + linkTags0.add("area"); linkTags1 = new TreeSet(insensitiveCollator); linkTags1.add("a"); @@ -180,8 +181,12 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen images.add(ie); } catch (MalformedURLException e) {} } - if (tagname.equalsIgnoreCase("base")) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {} - if (tagname.equalsIgnoreCase("frame")) anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name","")); + if (tagname.equalsIgnoreCase("base")) try { + root = new URL(tagopts.getProperty("href", "")); + } catch (MalformedURLException e) {} + if (tagname.equalsIgnoreCase("frame")) { + anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name","")); + } if (tagname.equalsIgnoreCase("meta")) { String name = tagopts.getProperty("name", ""); if (name.length() > 0) { @@ -194,11 +199,20 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen return; } } + if (tagname.equalsIgnoreCase("area")) { + String title = cleanLine(tagopts.getProperty("title","")); + //String alt = tagopts.getProperty("alt",""); + String href = tagopts.getProperty("href", ""); + if (href.length() > 0) anchors.put(absolutePath(href), title); + } } public void scrapeTag1(String tagname, Properties tagopts, byte[] text) { // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text)); - if ((tagname.equalsIgnoreCase("a")) && (text.length < 2048)) anchors.put(absolutePath(tagopts.getProperty("href", "")), super.stripAll(new serverByteBuffer(text)).trim().toString()); + if ((tagname.equalsIgnoreCase("a")) && (text.length < 2048)) { + String href = tagopts.getProperty("href", ""); + if (href.length() > 0) anchors.put(absolutePath(href), super.stripAll(new serverByteBuffer(text)).trim().toString()); + } String h; if ((tagname.equalsIgnoreCase("h1")) && (text.length < 1024)) { h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString());