enhanced webstructure image: introduced

- multiple hosts can be listed (comma-separated) as host argument
- new 'bf'-attribut (branch factor): the maximum number of edges per
node
- the bf-value is computed automatically
- ordering of nodes when the graphic is drawed: mostly the drawing ends
with an limitation eg. number of nodes. When this happens, it should be
ensured that more 'interesting' nodes are painted in advance. This is
now done by sorting all nodes by the number of links they have in de
distant sub-graph.
pull/1/head
Michael Peter Christen 13 years ago
parent 47ae7e322e
commit 39317a6c66

@ -25,15 +25,18 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.AbstractMap;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.peers.graphics.WebStructureGraph; import net.yacy.peers.graphics.WebStructureGraph;
@ -41,6 +44,7 @@ import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects; import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch; import net.yacy.server.serverSwitch;
import net.yacy.visualization.GraphPlotter; import net.yacy.visualization.GraphPlotter;
import net.yacy.visualization.GraphPlotter.Point;
import net.yacy.visualization.PrintTool; import net.yacy.visualization.PrintTool;
import net.yacy.visualization.RasterPlotter; import net.yacy.visualization.RasterPlotter;
@ -61,17 +65,25 @@ public class WebStructurePicture_p {
int height = 576; int height = 576;
int depth = 3; int depth = 3;
int nodes = 300; // maximum number of host nodes that are painted int nodes = 300; // maximum number of host nodes that are painted
int bf = 12; // maximum number of branches around nodes; less nodes makes the graphic look more structured
int time = -1; int time = -1;
String host = null; String hosts = null;
int cyc = 0; int cyc = 0;
if (post != null) { if (post != null) {
width = post.getInt("width", 1024); width = post.getInt("width", 1024);
if (width < 32 ) width = 32;
if (width > 10000) width = 10000;
height = post.getInt("height", 576); height = post.getInt("height", 576);
if (height < 24) height = 24;
if (height > 10000) height = 10000;
depth = post.getInt("depth", 3); depth = post.getInt("depth", 3);
if (depth > 8) depth = 8;
if (depth < 0) depth = 0;
nodes = post.getInt("nodes", width * height * 100 / 1024 / 576); nodes = post.getInt("nodes", width * height * 100 / 1024 / 576);
bf = post.getInt("bf", depth <= 0 ? -1 : (int) Math.round(2.0d * Math.pow(nodes, 1.0d / depth)));
time = post.getInt("time", -1); time = post.getInt("time", -1);
host = post.get("host", null); hosts = post.get("host", null);
color_text = post.get("colortext", color_text); color_text = post.get("colortext", color_text);
color_back = post.get("colorback", color_back); color_back = post.get("colorback", color_back);
color_dot = post.get("colordot", color_dot); color_dot = post.get("colordot", color_dot);
@ -80,41 +92,36 @@ public class WebStructurePicture_p {
cyc = post.getInt("cyc", 0); cyc = post.getInt("cyc", 0);
} }
// too small values lead to an error, too big to huge CPU/memory consumption, resulting in possible DOS.
if (width < 32 ) width = 32;
if (width > 10000) width = 10000;
if (height < 24) height = 24;
if (height > 10000) height = 10000;
if (depth > 8) depth = 8;
if (depth < 0) depth = 0;
// calculate target time // calculate target time
final long timeout = (time < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + (time * 8 / 10); final long timeout = (time < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + (time * 8 / 10);
// find start point // find start point
if ((host == null) || (host.isEmpty()) || (host.equals("auto"))) { if (hosts == null || hosts.isEmpty() || hosts.equals("auto")) {
// find domain with most references // find domain with most references
host = sb.webStructure.hostWithMaxReferences(); hosts = sb.webStructure.hostWithMaxReferences();
} }
final RasterPlotter graphPicture; final RasterPlotter graphPicture;
if (host == null) { if (hosts == null) {
// probably no information available // probably no information available
final RasterPlotter.DrawMode drawMode = (RasterPlotter.darkColor(color_back)) ? RasterPlotter.DrawMode.MODE_ADD : RasterPlotter.DrawMode.MODE_SUB; final RasterPlotter.DrawMode drawMode = (RasterPlotter.darkColor(color_back)) ? RasterPlotter.DrawMode.MODE_ADD : RasterPlotter.DrawMode.MODE_SUB;
graphPicture = new RasterPlotter(width, height, drawMode, color_back); graphPicture = new RasterPlotter(width, height, drawMode, color_back);
PrintTool.print(graphPicture, width / 2, height / 2, 0, "NO WEB STRUCTURE DATA AVAILABLE.", 0); PrintTool.print(graphPicture, width / 2, height / 2, 0, "NO WEB STRUCTURE DATA AVAILABLE.", 0);
PrintTool.print(graphPicture, width / 2, height / 2 + 16, 0, "START A WEB CRAWL TO OBTAIN STRUCTURE DATA.", 0); PrintTool.print(graphPicture, width / 2, height / 2 + 16, 0, "START A WEB CRAWL TO OBTAIN STRUCTURE DATA.", 0);
} else { } else {
// find start hash
String hash = null;
if (host != null && host.length() > 0) try {
hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);
} catch (final MalformedURLException e) {Log.logException(e);}
//assert (sb.webStructure.outgoingReferences(hash) != null);
// recursively find domains, up to a specific depth // recursively find domains, up to a specific depth
GraphPlotter graph = new GraphPlotter(); GraphPlotter graph = new GraphPlotter();
if (host != null && hash != null) place(graph, sb.webStructure, hash, host, nodes, timeout, 0.0, 0.0, 0, depth, cyc); String[] hostlist = hosts.split(",");
//graph.print(); for (int i = 0; i < hostlist.length; i++) {
String host = hostlist[i];
String hash = null;
try {hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);} catch (final MalformedURLException e) {Log.logException(e);}
Map.Entry<String, String> centernode = new AbstractMap.SimpleEntry<String, String>(hash, host);
double angle = 2.0d * i * Math.PI / hostlist.length - Math.PI / hostlist.length;
if (hostlist.length == 3) angle -= Math.PI / 2;
if (hostlist.length == 4) angle += Math.PI / 4;
graph.addNode(centernode.getValue(), Math.cos(angle) / 8, Math.sin(angle) / 8, 0);
place(graph, sb.webStructure, centernode, bf, nodes, timeout, hostlist.length == 1 ? 0 : 1, hostlist.length == 1 ? depth : depth + 1, cyc);
}
// apply physics to it to get a better shape // apply physics to it to get a better shape
if (post != null && post.containsKey("pa")) { if (post != null && post.containsKey("pa")) {
@ -130,69 +137,57 @@ public class WebStructurePicture_p {
// print headline // print headline
graphPicture.setColor(color_text); graphPicture.setColor(color_text);
PrintTool.print(graphPicture, 2, 8, 0, "YACY WEB-STRUCTURE ANALYSIS", -1); PrintTool.print(graphPicture, 2, 8, 0, "YACY WEB-STRUCTURE ANALYSIS", -1);
if (host != null) PrintTool.print(graphPicture, 2, 16, 0, "LINK ENVIRONMENT OF DOMAIN " + host.toUpperCase(), -1); if (hosts != null) PrintTool.print(graphPicture, 2, 16, 0, "LINK ENVIRONMENT OF DOMAIN " + hosts.toUpperCase(), -1);
PrintTool.print(graphPicture, width - 2, 8, 0, "SNAPSHOT FROM " + new Date().toString().toUpperCase(), 1); PrintTool.print(graphPicture, width - 2, 8, 0, "SNAPSHOT FROM " + new Date().toString().toUpperCase(), 1);
return graphPicture; return graphPicture;
} }
private static final int place( private static final int place(
final GraphPlotter graph, final WebStructureGraph structure, final String centerhash, final String centerhost, final GraphPlotter graph, final WebStructureGraph structure, Map.Entry<String, String> pivotnode,
int maxnodes, final long timeout, final double x, final double y, int nextlayer, final int maxlayer, int bf, int maxnodes, final long timeout, int nextlayer, final int maxlayer, final int cyc) {
final int cyc) { Point pivotpoint = graph.getNode(pivotnode.getValue());
// returns the number of nodes that had been placed int branches = 0;
assert centerhost != null; if (nextlayer == maxlayer) return branches;
final GraphPlotter.Point center = graph.getNode(centerhost);
int mynodes = 0;
if (center == null) {
graph.addNode(centerhost, x, y, nextlayer);
maxnodes--;
mynodes++;
}
if (nextlayer == maxlayer) return mynodes;
nextlayer++; nextlayer++;
final double radius = 1.0 / (1 << nextlayer); final double radius = 1.0 / (1 << nextlayer);
final WebStructureGraph.StructureEntry sr = structure.outgoingReferences(centerhash); final WebStructureGraph.StructureEntry sr = structure.outgoingReferences(pivotnode.getKey());
final Map<String, Integer> next = (sr == null) ? new HashMap<String, Integer>() : sr.references; final Map<String, Integer> next = (sr == null) ? new HashMap<String, Integer>() : sr.references;
String targethash, targethost; ClusteredScoreMap<String> next0 = new ClusteredScoreMap<String>();
for (Map.Entry<String, Integer> entry: next.entrySet()) next0.set(entry.getKey(), entry.getValue());
// first set points to next hosts // first set points to next hosts
final List<String[]> targets = new ArrayList<String[]>(); final List<Map.Entry<String, String>> targets = new ArrayList<Map.Entry<String, String>>();
int maxtargetrefs = 8, maxthisrefs = 8; int maxtargetrefs = 8, maxthisrefs = 8;
int targetrefs, thisrefs; int targetrefs, thisrefs;
double rr, re; double rr, re;
for (Map.Entry<String, Integer> entry: next.entrySet()) { Iterator<String> i = next0.keys(false);
targethash = entry.getKey(); while (i.hasNext()) {
targethost = structure.hostHash2hostName(targethash); String targethash = i.next();
String targethost = structure.hostHash2hostName(targethash);
if (targethost == null) continue; if (targethost == null) continue;
thisrefs = entry.getValue().intValue(); thisrefs = next.get(targethash).intValue();
targetrefs = structure.referencesCount(targethash); // can be cpu/time-critical targetrefs = structure.referencesCount(targethash); // can be cpu/time-critical
maxtargetrefs = Math.max(targetrefs, maxtargetrefs); maxtargetrefs = Math.max(targetrefs, maxtargetrefs);
maxthisrefs = Math.max(thisrefs, maxthisrefs); maxthisrefs = Math.max(thisrefs, maxthisrefs);
targets.add(new String[] {targethash, targethost}); targets.add(new AbstractMap.SimpleEntry<String, String>(targethash, targethost));
if (graph.getNode(targethost) != null) continue; if (graph.getNode(targethost) != null) continue;
// set a new point. It is placed on a circle around the host point // set a new point. It is placed on a circle around the host point
final double angle = ((Base64Order.enhancedCoder.cardinal((targethash + "____").getBytes()) / maxlongd) + (cyc / 360.0d)) * 2.0d * Math.PI; final double angle = ((Base64Order.enhancedCoder.cardinal((targethash + "____").getBytes()) / maxlongd) + (cyc / 360.0d)) * 2.0d * Math.PI;
//System.out.println("ANGLE = " + angle); //System.out.println("ANGLE = " + angle);
rr = radius * 0.25 * (1 - targetrefs / (double) maxtargetrefs); rr = radius * 0.25 * (1 - targetrefs / (double) maxtargetrefs);
re = radius * 0.5 * (thisrefs / (double) maxthisrefs); re = radius * 0.5 * (thisrefs / (double) maxthisrefs);
graph.addNode(targethost, x + (radius - rr - re) * Math.cos(angle), y + (radius - rr - re) * Math.sin(angle), nextlayer); graph.addNode(targethost, pivotpoint.x + (radius - rr - re) * Math.cos(angle), pivotpoint.y + (radius - rr - re) * Math.sin(angle), nextlayer);
mynodes++; branches++;
if (maxnodes-- <= 0 || System.currentTimeMillis() >= timeout) break; if (maxnodes-- <= 0 || (bf > 0 && branches >= bf) || System.currentTimeMillis() >= timeout) break;
} }
// recursively set next hosts // recursively set next hosts
int nextnodes; int nextnodes;
for (String[] target: targets) { for (Map.Entry<String, String> target: targets) {
targethash = target[0]; nextnodes = ((maxnodes <= 0) || (System.currentTimeMillis() >= timeout)) ? 0 : place(graph, structure, target, bf, maxnodes, timeout, nextlayer, maxlayer, cyc);
targethost = target[1]; branches += nextnodes;
final GraphPlotter.Point c = graph.getNode(targethost);
assert c != null;
nextnodes = ((maxnodes <= 0) || (System.currentTimeMillis() >= timeout)) ? 0 : place(graph, structure, targethash, targethost, maxnodes, timeout, c.x, c.y, nextlayer, maxlayer, cyc);
mynodes += nextnodes;
maxnodes -= nextnodes; maxnodes -= nextnodes;
graph.setEdge(centerhost, targethost); graph.setEdge(pivotnode.getValue(), target.getValue());
} }
return mynodes; return branches;
} }
} }

@ -185,22 +185,18 @@ public class WebStructureGraph {
} }
private static int refstr2count(final String refs) { private static int refstr2count(final String refs) {
if ( (refs == null) || (refs.length() <= 8) ) { if (refs == null || refs.length() <= 8) return 0;
return 0;
}
assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length(); assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length();
return (refs.length() - 8) / 10; return (refs.length() - 8) / 10;
} }
private static Map<String, Integer> refstr2map(final String refs) { private static Map<String, Integer> refstr2map(final String refs) {
if ( (refs == null) || (refs.length() <= 8) ) { if (refs == null || refs.length() <= 8) return new HashMap<String, Integer>();
return new HashMap<String, Integer>();
}
final Map<String, Integer> map = new HashMap<String, Integer>(); final Map<String, Integer> map = new HashMap<String, Integer>();
String c; String c;
final int refsc = refstr2count(refs); final int refsc = refstr2count(refs);
int d; int d;
for ( int i = 0; i < refsc; i++ ) { for (int i = 0; i < refsc; i++) {
c = refs.substring(8 + i * 10, 8 + (i + 1) * 10); c = refs.substring(8 + i * 10, 8 + (i + 1) * 10);
try { try {
d = Integer.valueOf(c.substring(6), 16); d = Integer.valueOf(c.substring(6), 16);
@ -302,9 +298,7 @@ public class WebStructureGraph {
} }
} }
} }
if ( h.isEmpty() ) { if (h.isEmpty()) return null;
return null;
}
return new StructureEntry(hosthash, hostname, date, h); return new StructureEntry(hosthash, hostname, date, h);
} }
@ -512,9 +506,7 @@ public class WebStructureGraph {
public int referencesCount(final String hosthash) { public int referencesCount(final String hosthash) {
// returns the number of hosts that are referenced by this hosthash // returns the number of hosts that are referenced by this hosthash
assert hosthash.length() == 6 : "hosthash = " + hosthash; assert hosthash.length() == 6 : "hosthash = " + hosthash;
if ( hosthash == null || hosthash.length() != 6 ) { if (hosthash == null || hosthash.length() != 6) return 0;
return 0;
}
SortedMap<String, byte[]> tailMap; SortedMap<String, byte[]> tailMap;
int c = 0; int c = 0;
synchronized ( this.structure_old ) { synchronized ( this.structure_old ) {

@ -169,8 +169,7 @@ public class GraphPlotter implements Cloneable {
} }
public Point addNode(final String node, Point p) { public Point addNode(final String node, Point p) {
final Point p0 = this.nodes.put(node, p); this.nodes.put(node, p);
assert p0 == null; // all add shall be unique
if (p.x > this.rightmost) this.rightmost = p.x; if (p.x > this.rightmost) this.rightmost = p.x;
if (p.x < this.leftmost) this.leftmost = p.x; if (p.x < this.leftmost) this.leftmost = p.x;
if (p.y > this.topmost) this.topmost = p.y; if (p.y > this.topmost) this.topmost = p.y;

Loading…
Cancel
Save