do not write frame links to webgraph

pull/1/head
Michael Peter Christen 10 years ago
parent 4eb89d7f15
commit 4144c7cc52

@ -62,7 +62,6 @@ import net.yacy.cora.util.ByteBuffer;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.retrieval.Request;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.EmbedEntry;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.util.FileUtils;
@ -914,6 +913,7 @@ dc_rights
public final static String CANONICAL_MARKER = "canonical";
public final static String IFRAME_MARKER = "iframe";
public final static String FRAME_MARKER = "frame";
public final static String EMBED_MARKER = "embed";
public static Map<AnchorURL, String> getHyperlinks(final Document[] documents, boolean includeNofollow) {
@ -935,6 +935,7 @@ dc_rights
if (canonical != null) {
result.put(canonical, CANONICAL_MARKER);
}
for (AnchorURL u: html.getFrames()) result.put(u, FRAME_MARKER);
for (AnchorURL u: html.getIFrames()) result.put(u, IFRAME_MARKER);
for (AnchorURL u: html.getEmbeds().keySet()) result.put(u, EMBED_MARKER);
}

@ -388,7 +388,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
final AnchorURL src = absolutePath(tag.opts.getProperty("src", EMPTY_STRING));
tag.opts.put("src", src.toNormalform(true));
src.setAll(tag.opts);
this.anchors.add(src);
//this.anchors.add(src); // don't add the frame to the anchors because the webgraph should not contain such links (by definition)
this.frames.add(src);
this.evaluationScores.match(Element.framepath, src.toNormalform(true));
} else if (tag.name.equalsIgnoreCase("body")) {

Loading…
Cancel
Save