From 1d3cfb380a85aa333e394a614cb1051718f3051b Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Thu, 30 Jun 2005 23:28:35 +0000
Subject: [PATCH] experimental implementation of base parsing in html parser

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@355 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 source/de/anomic/htmlFilter/htmlFilterContentScraper.java | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index 3fb292c10..47c7afc44 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -123,8 +123,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
     public void scrapeTag1(String tagname, Properties tagopts, byte[] text) {
 	//System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text));
 	if ((tagname.equals("a")) && (text.length < 2048)) anchors.put(absolutePath(tagopts.getProperty("href", "")), super.stripAll(new serverByteBuffer(text)).trim().toString());
-	if ((tagname.equals("h1")) && (text.length < 512)) headline = super.stripAll(new serverByteBuffer(text)).toString();
-	if ((tagname.equals("title")) && (text.length < 512)) title = super.stripAll(new serverByteBuffer(text)).toString();
+	if ((tagname.equals("h1")) && (text.length < 1024)) headline = super.stripAll(new serverByteBuffer(text)).toString();
+	if ((tagname.equals("title")) && (text.length < 1024)) title = super.stripAll(new serverByteBuffer(text)).toString();
+        if ((tagname.equals("base")) && (text.length < 512)) try {root = new URL(tagopts.getProperty("href", ""));} catch (MalformedURLException e) {}
     }