From cc667b0aa54b11395d2500222952f2a0696824fd Mon Sep 17 00:00:00 2001 From: theli Date: Tue, 19 Sep 2006 16:13:13 +0000 Subject: [PATCH] *) htmlFilterContentScraper.java: adding support for link tag git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2633 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../htmlFilter/htmlFilterContentScraper.java | 19 +++++++++++++++++++ .../server/logging/ConsoleOutErrHandler.java | 9 +++++++++ 2 files changed, 28 insertions(+) diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 5f5209d61..44f33cb81 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -78,6 +78,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen linkTags0.add("frame"); linkTags0.add("meta"); linkTags0.add("area"); + linkTags0.add("link"); linkTags1 = new TreeSet(insensitiveCollator); linkTags1.add("a"); @@ -171,6 +172,24 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen String href = tagopts.getProperty("href", ""); if (href.length() > 0) anchors.put(absolutePath(href), areatitle); } + if (tagname.equalsIgnoreCase("link")) { + URL newLink = null; + try { + newLink = new URL(absolutePath(tagopts.getProperty("href", ""))); + } catch (MalformedURLException e) {} + + if (newLink != null) { + String type = tagopts.getProperty("rel", ""); + String linktitle = tagopts.getProperty("title", ""); + + if (type.equalsIgnoreCase("shortcut icon")) { + htmlFilterImageEntry ie = new htmlFilterImageEntry(newLink, linktitle, -1,-1); + images.add(ie); + } else if (!type.equalsIgnoreCase("stylesheet") && !type.equalsIgnoreCase("alternate stylesheet")) { + anchors.put(newLink.toString(), linktitle); + } + } + } // fire event fireScrapeTag0(tagname, tagopts); diff --git a/source/de/anomic/server/logging/ConsoleOutErrHandler.java b/source/de/anomic/server/logging/ConsoleOutErrHandler.java index 4a8bbb1eb..0eb91cd0b 100644 --- a/source/de/anomic/server/logging/ConsoleOutErrHandler.java +++ b/source/de/anomic/server/logging/ConsoleOutErrHandler.java @@ -45,6 +45,7 @@ package de.anomic.server.logging; +import java.io.UnsupportedEncodingException; import java.util.logging.ConsoleHandler; import java.util.logging.Filter; import java.util.logging.Formatter; @@ -88,6 +89,14 @@ public final class ConsoleOutErrHandler extends Handler{ String formatter = manager.getProperty(className + ".formatter"); setFormatter(makeFormatter(formatter)); + String encoding = manager.getProperty(className + ".encoding"); + try { + this.stdOutHandler.setEncoding(encoding); + this.stdErrHandler.setEncoding(encoding); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + String ignoreCtrlChrStr = manager.getProperty(className + ".ignoreCtrlChr"); this.ignoreCtrlChr = (ignoreCtrlChrStr==null)?false:ignoreCtrlChrStr.equalsIgnoreCase("true");