From b18273bbb2177de299f31abd6d64886f9d039838 Mon Sep 17 00:00:00 2001 From: low012 Date: Tue, 7 Nov 2006 20:42:37 +0000 Subject: [PATCH] *) YaCy will extract URLs from tags now. *) YaCy will extract URLs from tags now if the name of the parameter is "movie". git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2934 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../htmlFilter/htmlFilterContentScraper.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 6196fabc0..54fd5252a 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -4,6 +4,8 @@ // first published on http://www.anomic.de // Frankfurt, Germany, 2004 // +// Contains contributions by Marc Nause [MN] +// // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ @@ -79,6 +81,8 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen linkTags0.add("meta"); linkTags0.add("area"); linkTags0.add("link"); + linkTags0.add("embed"); //added by [MN] + linkTags0.add("param"); //added by [MN] linkTags1 = new TreeSet(insensitiveCollator); linkTags1.add("a"); @@ -203,7 +207,18 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen } } } - + //start contrib [MN] + if (tagname.equalsIgnoreCase("embed")) { + anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name","")); + } + if (tagname.equalsIgnoreCase("param")) { + String name = tagopts.getProperty("name", ""); + if (name.equalsIgnoreCase("movie")) { + anchors.put(absolutePath(tagopts.getProperty("value", "")),name); + } + } + //end contrib [MN] + // fire event fireScrapeTag0(tagname, tagopts); }