From 0c5548a7ff07a111def941886fc5b71f75ddb2e3 Mon Sep 17 00:00:00 2001 From: reger Date: Fri, 18 Dec 2015 02:35:44 +0100 Subject: [PATCH] fix (todo) remove redundant holding of email link nameproperty in parser document --- htroot/ViewFile.java | 10 +++++----- source/net/yacy/document/Document.java | 13 ++++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 237ad8d86..54e1bb1f3 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -328,17 +328,17 @@ public class ViewFile { int i = 0; if (document.getEmaillinks() != null) { - Iterator> emailit = document.getEmaillinks().entrySet().iterator(); + Iterator emailit = document.getEmaillinks().iterator(); while (emailit.hasNext()) { - Entry eentry = emailit.next(); + AnchorURL eentry = emailit.next(); prop.put("viewMode_links_" + i + "_nr", i); prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0"); prop.put("viewMode_links_" + i + "_type", "email"); - prop.put("viewMode_links_" + i + "_text", (eentry.getValue().isEmpty()) ? " " : eentry.getValue()); + prop.put("viewMode_links_" + i + "_text", (eentry.getTextProperty().isEmpty()) ? " " : eentry.getTextProperty()); prop.put("viewMode_links_" + i + "_url", "#"); - prop.put("viewMode_links_" + i + "_link", eentry.getKey()); + prop.put("viewMode_links_" + i + "_link", eentry.toNormalform(true)); prop.put("viewMode_links_" + i + "_rel", ""); - prop.put("viewMode_links_" + i + "_name", ""); + prop.put("viewMode_links_" + i + "_name", eentry.getNameProperty()); dark = !dark; i++; } diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 57c58ee37..d58a2bca5 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -83,9 +83,9 @@ public class Document { // the anchors and images - Maps are URL-to-EntityDescription mappings. // The EntityDescription appear either as visible text in anchors or as alternative // text in image tags. - private LinkedHashMap audiolinks, videolinks, applinks, hyperlinks; + private LinkedHashMap audiolinks, videolinks, applinks, hyperlinks; // TODO: check if redundant value (set to key.getNameProperty()) is needed private LinkedHashMap inboundlinks, outboundlinks; - private Map emaillinks; + private Set emaillinks; // mailto: links private MultiProtocolURL favicon; private boolean resorted; private final Set languages; @@ -473,7 +473,10 @@ dc_rights return this.applinks; } - public Map getEmaillinks() { + /** + * @return mailto links + */ + public Set getEmaillinks() { // this is part of the getAnchor-set: only links to email addresses if (!this.resorted) resortLinks(); return this.emaillinks; @@ -509,7 +512,7 @@ dc_rights this.videolinks = new LinkedHashMap(); this.audiolinks = new LinkedHashMap(); this.applinks = new LinkedHashMap(); - this.emaillinks = new LinkedHashMap(); + this.emaillinks = new LinkedHashSet(); final Map collectedImages = new HashMap(); // this is a set that is collected now and joined later to the imagelinks for (final Map.Entry entry: this.images.entrySet()) { if (entry.getKey() != null && entry.getKey().getHost() != null && entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image"); @@ -520,7 +523,7 @@ dc_rights final String name = url.getNameProperty(); // check mailto scheme first (not suppose to get into in/outboundlinks or hyperlinks -> crawler can't process) if (url.getProtocol().equals("mailto")) { - this.emaillinks.put(u.substring(7), name); // TODO: check why key as string instead of Disgest/AnchorURL + this.emaillinks.add(url); continue; }