fixed base implementation (verified with www.burningboard.net/archive/index.html)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@356 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 1d3cfb380a
commit ec8a5ad060

@ -60,6 +60,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
static {
linkTags0 = new HashSet();
linkTags0.add("img");
linkTags0.add("base");
linkTags1 = new HashSet();
linkTags1.add("a");
@ -118,14 +119,14 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
public void scrapeTag0(String tagname, Properties tagopts) {
if (tagname.equals("img")) images.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("alt",""));
if (tagname.equals("base")) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {}
}
public void scrapeTag1(String tagname, Properties tagopts, byte[] text) {
//System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text));
if ((tagname.equals("a")) && (text.length < 2048)) anchors.put(absolutePath(tagopts.getProperty("href", "")), super.stripAll(new serverByteBuffer(text)).trim().toString());
if ((tagname.equals("h1")) && (text.length < 1024)) headline = super.stripAll(new serverByteBuffer(text)).toString();
if ((tagname.equals("title")) && (text.length < 1024)) title = super.stripAll(new serverByteBuffer(text)).toString();
if ((tagname.equals("base")) && (text.length < 512)) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {}
if ((tagname.equals("title")) && (text.length < 1024)) title = super.stripAll(new serverByteBuffer(text)).toString();
}

Loading…
Cancel
Save