|
|
|
@ -630,17 +630,21 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|
|
|
|
final String name = tag.opts.getProperty("name", EMPTY_STRING);
|
|
|
|
|
if (name.equalsIgnoreCase("movie")) {
|
|
|
|
|
AnchorURL url = absolutePath(tag.opts.getProperty("value", EMPTY_STRING));
|
|
|
|
|
tag.opts.put("value", url.toNormalform(true));
|
|
|
|
|
url.setAll(tag.opts);
|
|
|
|
|
this.addAnchor(url);
|
|
|
|
|
if(url != null) {
|
|
|
|
|
tag.opts.put("value", url.toNormalform(true));
|
|
|
|
|
url.setAll(tag.opts);
|
|
|
|
|
this.addAnchor(url);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (tag.name.equalsIgnoreCase("iframe")) {
|
|
|
|
|
final AnchorURL src = absolutePath(tag.opts.getProperty("src", EMPTY_STRING));
|
|
|
|
|
tag.opts.put("src", src.toNormalform(true));
|
|
|
|
|
src.setAll(tag.opts);
|
|
|
|
|
//this.addAnchor(src); // don't add the iframe to the anchors because the webgraph should not contain such links (by definition)
|
|
|
|
|
this.iframes.add(src);
|
|
|
|
|
this.evaluationScores.match(Element.iframepath, src.toNormalform(true));
|
|
|
|
|
if(src != null) {
|
|
|
|
|
tag.opts.put("src", src.toNormalform(true));
|
|
|
|
|
src.setAll(tag.opts);
|
|
|
|
|
// this.addAnchor(src); // don't add the iframe to the anchors because the webgraph should not contain such links (by definition)
|
|
|
|
|
this.iframes.add(src);
|
|
|
|
|
this.evaluationScores.match(Element.iframepath, src.toNormalform(true));
|
|
|
|
|
}
|
|
|
|
|
} else if (tag.name.equalsIgnoreCase("html")) {
|
|
|
|
|
final String lang = tag.opts.getProperty("lang", EMPTY_STRING);
|
|
|
|
|
if (!lang.isEmpty()) // fake a language meta to preserv detection from <html lang="xx" />
|
|
|
|
|