*) htmlFilterContentScraper.java: adding support for link tag

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2633 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 16ba5d1b46
commit cc667b0aa5

@ -78,6 +78,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
linkTags0.add("frame");
linkTags0.add("meta");
linkTags0.add("area");
linkTags0.add("link");
linkTags1 = new TreeSet(insensitiveCollator);
linkTags1.add("a");
@ -171,6 +172,24 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
String href = tagopts.getProperty("href", "");
if (href.length() > 0) anchors.put(absolutePath(href), areatitle);
}
if (tagname.equalsIgnoreCase("link")) {
URL newLink = null;
try {
newLink = new URL(absolutePath(tagopts.getProperty("href", "")));
} catch (MalformedURLException e) {}
if (newLink != null) {
String type = tagopts.getProperty("rel", "");
String linktitle = tagopts.getProperty("title", "");
if (type.equalsIgnoreCase("shortcut icon")) {
htmlFilterImageEntry ie = new htmlFilterImageEntry(newLink, linktitle, -1,-1);
images.add(ie);
} else if (!type.equalsIgnoreCase("stylesheet") && !type.equalsIgnoreCase("alternate stylesheet")) {
anchors.put(newLink.toString(), linktitle);
}
}
}
// fire event
fireScrapeTag0(tagname, tagopts);

@ -45,6 +45,7 @@
package de.anomic.server.logging;
import java.io.UnsupportedEncodingException;
import java.util.logging.ConsoleHandler;
import java.util.logging.Filter;
import java.util.logging.Formatter;
@ -88,6 +89,14 @@ public final class ConsoleOutErrHandler extends Handler{
String formatter = manager.getProperty(className + ".formatter");
setFormatter(makeFormatter(formatter));
String encoding = manager.getProperty(className + ".encoding");
try {
this.stdOutHandler.setEncoding(encoding);
this.stdErrHandler.setEncoding(encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String ignoreCtrlChrStr = manager.getProperty(className + ".ignoreCtrlChr");
this.ignoreCtrlChr = (ignoreCtrlChrStr==null)?false:ignoreCtrlChrStr.equalsIgnoreCase("true");

Loading…
Cancel
Save