Take out mailto collect in internal parsed document

As earlier plans to make use of mailto as separate webgraph entity didn't
materialize (see  http://forum.yacy-websuche.de/viewtopic.php?f=8&t=5726&p=32493&hilit=mailto#p32493)
free the unused handling and resources.
pull/122/head
reger 8 years ago
parent 335868edba
commit c77e43a391

@ -424,23 +424,6 @@ public class ViewFile {
boolean dark = true; boolean dark = true;
int i = 0; int i = 0;
if (document.getEmaillinks() != null) {
Iterator<AnchorURL> emailit = document.getEmaillinks().iterator();
while (emailit.hasNext()) {
AnchorURL eentry = emailit.next();
prop.put("viewMode_links_" + i + "_nr", i);
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "email");
prop.put("viewMode_links_" + i + "_text", (eentry.getTextProperty().isEmpty()) ? "&nbsp;" : eentry.getTextProperty());
prop.put("viewMode_links_" + i + "_url", "#");
prop.put("viewMode_links_" + i + "_link", eentry.toNormalform(true));
prop.put("viewMode_links_" + i + "_rel", "");
prop.put("viewMode_links_" + i + "_name", eentry.getNameProperty());
dark = !dark;
i++;
}
}
i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0)); i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0));
i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0)); i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
dark = (i % 2 == 0); dark = (i % 2 == 0);

@ -86,7 +86,6 @@ public class Document {
// text in image tags. // text in image tags.
private LinkedHashMap<AnchorURL, String> audiolinks, videolinks, applinks, hyperlinks; // TODO: check if redundant value (set to key.getNameProperty()) is needed private LinkedHashMap<AnchorURL, String> audiolinks, videolinks, applinks, hyperlinks; // TODO: check if redundant value (set to key.getNameProperty()) is needed
private LinkedHashMap<DigestURL, String> inboundlinks, outboundlinks; private LinkedHashMap<DigestURL, String> inboundlinks, outboundlinks;
private Set<AnchorURL> emaillinks; // mailto: links
/** links to icons that belongs to the document (mapped by absolute URL) */ /** links to icons that belongs to the document (mapped by absolute URL) */
private Map<DigestURL, IconEntry> icons; private Map<DigestURL, IconEntry> icons;
private boolean resorted; private boolean resorted;
@ -141,7 +140,6 @@ public class Document {
this.audiolinks = null; this.audiolinks = null;
this.videolinks = null; this.videolinks = null;
this.applinks = null; this.applinks = null;
this.emaillinks = null;
this.icons = new HashMap<>(); this.icons = new HashMap<>();
this.resorted = false; this.resorted = false;
this.inboundlinks = null; this.inboundlinks = null;
@ -520,17 +518,9 @@ dc_rights
return this.applinks; return this.applinks;
} }
/**
* @return mailto links
*/
public Set<AnchorURL> getEmaillinks() {
// this is part of the getAnchor-set: only links to email addresses
if (!this.resorted) resortLinks();
return this.emaillinks;
}
/** /**
* @return last modification date of the source document * @return last modification date of the source document. (The date is initialized with last modification date or received date)
*/ */
public Date getLastModified() { public Date getLastModified() {
return this.lastModified; return this.lastModified;
@ -551,7 +541,7 @@ dc_rights
if (this.resorted) return; if (this.resorted) return;
synchronized (this) { synchronized (this) {
if (this.resorted) return; if (this.resorted) return;
// extract hyperlinks, medialinks and emaillinks from anchorlinks // extract hyperlinks, medialinks from anchorlinks
String u; String u;
int extpos, qpos; int extpos, qpos;
String ext = null; String ext = null;
@ -562,7 +552,6 @@ dc_rights
this.videolinks = new LinkedHashMap<AnchorURL, String>(); this.videolinks = new LinkedHashMap<AnchorURL, String>();
this.audiolinks = new LinkedHashMap<AnchorURL, String>(); this.audiolinks = new LinkedHashMap<AnchorURL, String>();
this.applinks = new LinkedHashMap<AnchorURL, String>(); this.applinks = new LinkedHashMap<AnchorURL, String>();
this.emaillinks = new LinkedHashSet<AnchorURL>();
final Map<AnchorURL, ImageEntry> collectedImages = new HashMap<AnchorURL, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks final Map<AnchorURL, ImageEntry> collectedImages = new HashMap<AnchorURL, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
for (final Map.Entry<DigestURL, ImageEntry> entry: this.images.entrySet()) { for (final Map.Entry<DigestURL, ImageEntry> entry: this.images.entrySet()) {
if (entry.getKey() != null && entry.getKey().getHost() != null && entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image"); if (entry.getKey() != null && entry.getKey().getHost() != null && entry.getKey().getHost().equals(thishost)) this.inboundlinks.put(entry.getKey(), "image"); else this.outboundlinks.put(entry.getKey(), "image");
@ -571,11 +560,6 @@ dc_rights
if (url == null) continue; if (url == null) continue;
u = url.toNormalform(true); u = url.toNormalform(true);
final String name = url.getNameProperty(); final String name = url.getNameProperty();
// check mailto scheme first (not suppose to get into in/outboundlinks or hyperlinks -> crawler can't process)
if (url.getProtocol().equals("mailto")) {
this.emaillinks.add(url);
continue;
}
final boolean noindex = url.getRelProperty().toLowerCase().indexOf("noindex",0) >= 0; final boolean noindex = url.getRelProperty().toLowerCase().indexOf("noindex",0) >= 0;
final boolean nofollow = url.getRelProperty().toLowerCase().indexOf("nofollow",0) >= 0; final boolean nofollow = url.getRelProperty().toLowerCase().indexOf("nofollow",0) >= 0;

Loading…
Cancel
Save