From ec8a5ad0609ba57d03b9e9132e31091de8df9c95 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 30 Jun 2005 23:49:32 +0000 Subject: [PATCH] fixed base implementation (verified with www.burningboard.net/archive/index.html) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@356 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/htmlFilter/htmlFilterContentScraper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 47c7afc44..89d5e6783 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -60,6 +60,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen static { linkTags0 = new HashSet(); linkTags0.add("img"); + linkTags0.add("base"); linkTags1 = new HashSet(); linkTags1.add("a"); @@ -118,14 +119,14 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen public void scrapeTag0(String tagname, Properties tagopts) { if (tagname.equals("img")) images.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("alt","")); + if (tagname.equals("base")) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {} } public void scrapeTag1(String tagname, Properties tagopts, byte[] text) { //System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text)); if ((tagname.equals("a")) && (text.length < 2048)) anchors.put(absolutePath(tagopts.getProperty("href", "")), super.stripAll(new serverByteBuffer(text)).trim().toString()); if ((tagname.equals("h1")) && (text.length < 1024)) headline = super.stripAll(new serverByteBuffer(text)).toString(); - if ((tagname.equals("title")) && (text.length < 1024)) title = super.stripAll(new serverByteBuffer(text)).toString(); - if ((tagname.equals("base")) && (text.length < 512)) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {} + if ((tagname.equals("title")) && (text.length < 1024)) title = super.stripAll(new serverByteBuffer(text)).toString(); }