*) Correcting Problems with htmlFilterContentScraper.java

Tag name comparison was case sensitive, therefore e.g.
      <A href="test.txt>test</A>
   was not parsed correctly.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@387 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 50d4c302c7
commit f57b60cd60

@ -125,15 +125,15 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
} }
public void scrapeTag0(String tagname, Properties tagopts) { public void scrapeTag0(String tagname, Properties tagopts) {
if (tagname.equals("img")) images.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("alt","")); if (tagname.equalsIgnoreCase("img")) images.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("alt",""));
if (tagname.equals("base")) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {} if (tagname.equalsIgnoreCase("base")) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {}
} }
public void scrapeTag1(String tagname, Properties tagopts, byte[] text) { public void scrapeTag1(String tagname, Properties tagopts, byte[] text) {
//System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text)); //System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text));
if ((tagname.equals("a")) && (text.length < 2048)) anchors.put(absolutePath(tagopts.getProperty("href", "")), super.stripAll(new serverByteBuffer(text)).trim().toString()); if ((tagname.equalsIgnoreCase("a")) && (text.length < 2048)) anchors.put(absolutePath(tagopts.getProperty("href", "")), super.stripAll(new serverByteBuffer(text)).trim().toString());
if ((tagname.equals("h1")) && (text.length < 1024)) headline = super.stripAll(new serverByteBuffer(text)).toString(); if ((tagname.equalsIgnoreCase("h1")) && (text.length < 1024)) headline = super.stripAll(new serverByteBuffer(text)).toString();
if ((tagname.equals("title")) && (text.length < 1024)) title = super.stripAll(new serverByteBuffer(text)).toString(); if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) title = super.stripAll(new serverByteBuffer(text)).toString();
} }

Loading…
Cancel
Save