From e8ddd415a849fa3969c157caa4f491d5fbe3541f Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 4 Apr 2014 14:43:54 +0200 Subject: [PATCH] enhanced the new link structure graph --- htroot/api/linkstructure.java | 48 ++++++++++++++----- htroot/env/hypertree.css | 4 +- htroot/js/hypertree.js | 19 +++++--- .../net/yacy/search/schema/HyperlinkEdge.java | 2 +- 4 files changed, 52 insertions(+), 21 deletions(-) diff --git a/htroot/api/linkstructure.java b/htroot/api/linkstructure.java index 146382a07..e988c18f3 100644 --- a/htroot/api/linkstructure.java +++ b/htroot/api/linkstructure.java @@ -21,6 +21,7 @@ import java.net.MalformedURLException; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.Map; import java.util.concurrent.BlockingQueue; @@ -92,7 +93,9 @@ public class linkstructure { ); SolrDocument doc; Map errorDocs = new HashMap(); - Map edges = new HashMap(); + Map inboundEdges = new HashMap(); + Map outboundEdges = new HashMap(); + Map errorEdges = new HashMap(); try { while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) { String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()); @@ -110,7 +113,7 @@ public class linkstructure { try { DigestURL linkurl = new DigestURL(link, null); String edgehash = ids + ASCII.String(linkurl.hash()); - edges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.InboundOk)); + inboundEdges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.Inbound)); } catch (MalformedURLException e) {} } links = URIMetadataNode.getLinks(doc, false); // outbound @@ -119,26 +122,49 @@ public class linkstructure { try { DigestURL linkurl = new DigestURL(link, null); String edgehash = ids + ASCII.String(linkurl.hash()); - edges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.Outbound)); + outboundEdges.put(edgehash, new HyperlinkEdge(from, linkurl, HyperlinkEdge.Type.Outbound)); } catch (MalformedURLException e) {} } } - if (edges.size() > maxnodes) break; + if (inboundEdges.size() + outboundEdges.size() > maxnodes) break; } } catch (InterruptedException e) { } catch (MalformedURLException e) { } // we use the errorDocs to mark all edges with endpoint to error documents - for (Map.Entry edge: edges.entrySet()) { - if (errorDocs.containsKey(edge.getValue().target.toNormalform(true))) edge.getValue().type = HyperlinkEdge.Type.Dead; + Iterator> i = inboundEdges.entrySet().iterator(); + Map.Entry edge; + while (i.hasNext()) { + edge = i.next(); + if (errorDocs.containsKey(edge.getValue().target.toNormalform(true))) { + i.remove(); + edge.getValue().type = HyperlinkEdge.Type.Dead; + errorEdges.put(edge.getKey(), edge.getValue()); + } } - + i = outboundEdges.entrySet().iterator(); + while (i.hasNext()) { + edge = i.next(); + if (errorDocs.containsKey(edge.getValue().target.toNormalform(true))) { + i.remove(); + edge.getValue().type = HyperlinkEdge.Type.Dead; + errorEdges.put(edge.getKey(), edge.getValue()); + } + } + // we put all edges together in a specific order which is used to create nodes in a svg display: + // notes that appear first are possible painted over by nodes coming later. + // less important nodes shall appear therefore first + Map edges = new LinkedHashMap(); + edges.putAll(outboundEdges); + edges.putAll(inboundEdges); + edges.putAll(errorEdges); + // finally just write out the edge array int c = 0; - for (Map.Entry edge: edges.entrySet()) { - prop.putJSON("list_" + c + "_source", edge.getValue().source.getPath()); - prop.putJSON("list_" + c + "_target", edge.getValue().type.equals(HyperlinkEdge.Type.Outbound) ? edge.getValue().target.toNormalform(true) : edge.getValue().target.getPath()); - prop.putJSON("list_" + c + "_type", edge.getValue().type.name()); + for (Map.Entry e: edges.entrySet()) { + prop.putJSON("list_" + c + "_source", e.getValue().source.getPath()); + prop.putJSON("list_" + c + "_target", e.getValue().type.equals(HyperlinkEdge.Type.Outbound) ? e.getValue().target.toNormalform(true) : e.getValue().target.getPath()); + prop.putJSON("list_" + c + "_type", e.getValue().type.name()); prop.put("list_" + c + "_eol", 1); c++; } diff --git a/htroot/env/hypertree.css b/htroot/env/hypertree.css index 74378461a..5175180bf 100644 --- a/htroot/env/hypertree.css +++ b/htroot/env/hypertree.css @@ -5,7 +5,7 @@ #Dead { fill: red; } -#InboundOk { +#Inbound { fill: green; } .hypertree { @@ -22,7 +22,7 @@ stroke: red; stroke-dasharray: 0,2 1; } -.hypertree-link.InboundOk { +.hypertree-link.Inbound { stroke: green; } circle { diff --git a/htroot/js/hypertree.js b/htroot/js/hypertree.js index 0b0450519..d05b931b1 100644 --- a/htroot/js/hypertree.js +++ b/htroot/js/hypertree.js @@ -3,13 +3,14 @@ function linkstructure(hostname, element, width, height, maxtime, maxnodes) { var links = []; $.getJSON("/api/linkstructure.json?about=" + hostname + "&maxtime=" + maxtime + "&maxnodes=" + maxnodes, function(links) { links.forEach(function(link) { - link.source = nodes[link.source] || (nodes[link.source] = {name: link.source}); - link.target = nodes[link.target] || (nodes[link.target] = {name: link.target}); + link.source = nodes[link.source] || (nodes[link.source] = {name: link.source, type:"Inbound"}); + link.target = nodes[link.target] || (nodes[link.target] = {name: link.target, type:link.type}); }); var force = d3.layout.force().nodes(d3.values(nodes)).links(links).size([width, height]).linkDistance(60).charge(-800).on("tick", tick).start(); + force.gravity(0.7); var svg = d3.select(element).append("svg").attr("id", "hypertree").attr("width", width).attr("height", height); svg.append("defs").selectAll("marker") - .data(["Dead", "Outbound", "InboundOk"]) + .data(["Dead", "Outbound", "Inbound"]) .enter().append("marker") .attr("id", function(d) { return d; }) .attr("viewBox", "0 -5 10 10") @@ -26,11 +27,15 @@ function linkstructure(hostname, element, width, height, maxtime, maxnodes) { svg.append("text").attr("x", 10).attr("y", height - 20).text("green: links to same domain").attr("style", "font-size:9px").attr("fill", "green"); svg.append("text").attr("x", 10).attr("y", height - 10).text("blue: links to other domains").attr("style", "font-size:9px").attr("fill", "lightblue"); svg.append("text").attr("x", 10).attr("y", height).text("red: dead links").attr("style", "font-size:9px").attr("fill", "red"); - var path = svg.append("g").selectAll("path").data(force.links()).enter().append("path").attr("class", - function(d) {return "hypertree-link " + d.type; }).attr("marker-end", function(d) { return "url(#" + d.type + ")";}); + var path = svg.append("g") + .selectAll("path").data(force.links()).enter().append("path") + .attr("class",function(d) {return "hypertree-link " + d.type; }) + .attr("marker-end", function(d) { return "url(#" + d.type + ")";}); var circle = svg.append("g").selectAll("circle").data(force.nodes()).enter().append("circle").attr("r", 4).call(force.drag); - var text = svg.append("g").selectAll("text").data(force.nodes()).enter().append("text").attr("x", 8).attr("y", ".31em").text( - function(d) {return d.name; }); + var text = svg.append("g") + .selectAll("text").data(force.nodes()).enter().append("text").attr("x", 8).attr("y", ".31em") + .attr("style", function(d) {return d.type == "Outbound" ? "fill:#888888;" : "fill:#000000;";}) + .text(function(d) {return d.name;}); function tick() { path.attr("d", linkArc); circle.attr("transform", transform); diff --git a/source/net/yacy/search/schema/HyperlinkEdge.java b/source/net/yacy/search/schema/HyperlinkEdge.java index 712cb9b4c..587699ae0 100644 --- a/source/net/yacy/search/schema/HyperlinkEdge.java +++ b/source/net/yacy/search/schema/HyperlinkEdge.java @@ -25,7 +25,7 @@ import net.yacy.cora.document.id.DigestURL; public class HyperlinkEdge { public static enum Type { - InboundOk, InboundNofollow, Outbound, Dead; + Inbound, InboundNofollow, Outbound, Dead; } public DigestURL source, target;