enhanced webstructure image: introduced

- multiple hosts can be listed (comma-separated) as host argument
- new 'bf'-attribut (branch factor): the maximum number of edges per
node
- the bf-value is computed automatically
- ordering of nodes when the graphic is drawed: mostly the drawing ends
with an limitation eg. number of nodes. When this happens, it should be
ensured that more 'interesting' nodes are painted in advance. This is
now done by sorting all nodes by the number of links they have in de
distant sub-graph.
pull/1/head
Michael Peter Christen 12 years ago
parent 47ae7e322e
commit 39317a6c66

@ -25,15 +25,18 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.net.MalformedURLException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.graphics.WebStructureGraph;
@ -41,6 +44,7 @@ import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.visualization.GraphPlotter;
import net.yacy.visualization.GraphPlotter.Point;
import net.yacy.visualization.PrintTool;
import net.yacy.visualization.RasterPlotter;
@ -61,60 +65,63 @@ public class WebStructurePicture_p {
int height = 576;
int depth = 3;
int nodes = 300; // maximum number of host nodes that are painted
int bf = 12; // maximum number of branches around nodes; less nodes makes the graphic look more structured
int time = -1;
String host = null;
String hosts = null;
int cyc = 0;
if (post != null) {
width = post.getInt("width", 1024);
if (width < 32 ) width = 32;
if (width > 10000) width = 10000;
height = post.getInt("height", 576);
if (height < 24) height = 24;
if (height > 10000) height = 10000;
depth = post.getInt("depth", 3);
if (depth > 8) depth = 8;
if (depth < 0) depth = 0;
nodes = post.getInt("nodes", width * height * 100 / 1024 / 576);
bf = post.getInt("bf", depth <= 0 ? -1 : (int) Math.round(2.0d * Math.pow(nodes, 1.0d / depth)));
time = post.getInt("time", -1);
host = post.get("host", null);
hosts = post.get("host", null);
color_text = post.get("colortext", color_text);
color_back = post.get("colorback", color_back);
color_dot = post.get("colordot", color_dot);
color_line = post.get("colorline", color_line);
color_lineend = post.get("colorlineend", color_lineend);
cyc = post.getInt("cyc", 0);
cyc = post.getInt("cyc", 0);
}
// too small values lead to an error, too big to huge CPU/memory consumption, resulting in possible DOS.
if (width < 32 ) width = 32;
if (width > 10000) width = 10000;
if (height < 24) height = 24;
if (height > 10000) height = 10000;
if (depth > 8) depth = 8;
if (depth < 0) depth = 0;
// calculate target time
final long timeout = (time < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + (time * 8 / 10);
// find start point
if ((host == null) || (host.isEmpty()) || (host.equals("auto"))) {
if (hosts == null || hosts.isEmpty() || hosts.equals("auto")) {
// find domain with most references
host = sb.webStructure.hostWithMaxReferences();
hosts = sb.webStructure.hostWithMaxReferences();
}
final RasterPlotter graphPicture;
if (host == null) {
if (hosts == null) {
// probably no information available
final RasterPlotter.DrawMode drawMode = (RasterPlotter.darkColor(color_back)) ? RasterPlotter.DrawMode.MODE_ADD : RasterPlotter.DrawMode.MODE_SUB;
graphPicture = new RasterPlotter(width, height, drawMode, color_back);
PrintTool.print(graphPicture, width / 2, height / 2, 0, "NO WEB STRUCTURE DATA AVAILABLE.", 0);
PrintTool.print(graphPicture, width / 2, height / 2 + 16, 0, "START A WEB CRAWL TO OBTAIN STRUCTURE DATA.", 0);
} else {
// find start hash
String hash = null;
if (host != null && host.length() > 0) try {
hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);
} catch (final MalformedURLException e) {Log.logException(e);}
//assert (sb.webStructure.outgoingReferences(hash) != null);
// recursively find domains, up to a specific depth
GraphPlotter graph = new GraphPlotter();
if (host != null && hash != null) place(graph, sb.webStructure, hash, host, nodes, timeout, 0.0, 0.0, 0, depth, cyc);
//graph.print();
String[] hostlist = hosts.split(",");
for (int i = 0; i < hostlist.length; i++) {
String host = hostlist[i];
String hash = null;
try {hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);} catch (final MalformedURLException e) {Log.logException(e);}
Map.Entry<String, String> centernode = new AbstractMap.SimpleEntry<String, String>(hash, host);
double angle = 2.0d * i * Math.PI / hostlist.length - Math.PI / hostlist.length;
if (hostlist.length == 3) angle -= Math.PI / 2;
if (hostlist.length == 4) angle += Math.PI / 4;
graph.addNode(centernode.getValue(), Math.cos(angle) / 8, Math.sin(angle) / 8, 0);
place(graph, sb.webStructure, centernode, bf, nodes, timeout, hostlist.length == 1 ? 0 : 1, hostlist.length == 1 ? depth : depth + 1, cyc);
}
// apply physics to it to get a better shape
if (post != null && post.containsKey("pa")) {
@ -130,69 +137,57 @@ public class WebStructurePicture_p {
// print headline
graphPicture.setColor(color_text);
PrintTool.print(graphPicture, 2, 8, 0, "YACY WEB-STRUCTURE ANALYSIS", -1);
if (host != null) PrintTool.print(graphPicture, 2, 16, 0, "LINK ENVIRONMENT OF DOMAIN " + host.toUpperCase(), -1);
if (hosts != null) PrintTool.print(graphPicture, 2, 16, 0, "LINK ENVIRONMENT OF DOMAIN " + hosts.toUpperCase(), -1);
PrintTool.print(graphPicture, width - 2, 8, 0, "SNAPSHOT FROM " + new Date().toString().toUpperCase(), 1);
return graphPicture;
}
private static final int place(
final GraphPlotter graph, final WebStructureGraph structure, final String centerhash, final String centerhost,
int maxnodes, final long timeout, final double x, final double y, int nextlayer, final int maxlayer,
final int cyc) {
// returns the number of nodes that had been placed
assert centerhost != null;
final GraphPlotter.Point center = graph.getNode(centerhost);
int mynodes = 0;
if (center == null) {
graph.addNode(centerhost, x, y, nextlayer);
maxnodes--;
mynodes++;
}
if (nextlayer == maxlayer) return mynodes;
final GraphPlotter graph, final WebStructureGraph structure, Map.Entry<String, String> pivotnode,
int bf, int maxnodes, final long timeout, int nextlayer, final int maxlayer, final int cyc) {
Point pivotpoint = graph.getNode(pivotnode.getValue());
int branches = 0;
if (nextlayer == maxlayer) return branches;
nextlayer++;
final double radius = 1.0 / (1 << nextlayer);
final WebStructureGraph.StructureEntry sr = structure.outgoingReferences(centerhash);
final WebStructureGraph.StructureEntry sr = structure.outgoingReferences(pivotnode.getKey());
final Map<String, Integer> next = (sr == null) ? new HashMap<String, Integer>() : sr.references;
String targethash, targethost;
ClusteredScoreMap<String> next0 = new ClusteredScoreMap<String>();
for (Map.Entry<String, Integer> entry: next.entrySet()) next0.set(entry.getKey(), entry.getValue());
// first set points to next hosts
final List<String[]> targets = new ArrayList<String[]>();
final List<Map.Entry<String, String>> targets = new ArrayList<Map.Entry<String, String>>();
int maxtargetrefs = 8, maxthisrefs = 8;
int targetrefs, thisrefs;
double rr, re;
for (Map.Entry<String, Integer> entry: next.entrySet()) {
targethash = entry.getKey();
targethost = structure.hostHash2hostName(targethash);
Iterator<String> i = next0.keys(false);
while (i.hasNext()) {
String targethash = i.next();
String targethost = structure.hostHash2hostName(targethash);
if (targethost == null) continue;
thisrefs = entry.getValue().intValue();
thisrefs = next.get(targethash).intValue();
targetrefs = structure.referencesCount(targethash); // can be cpu/time-critical
maxtargetrefs = Math.max(targetrefs, maxtargetrefs);
maxthisrefs = Math.max(thisrefs, maxthisrefs);
targets.add(new String[] {targethash, targethost});
targets.add(new AbstractMap.SimpleEntry<String, String>(targethash, targethost));
if (graph.getNode(targethost) != null) continue;
// set a new point. It is placed on a circle around the host point
final double angle = ((Base64Order.enhancedCoder.cardinal((targethash + "____").getBytes()) / maxlongd) + (cyc / 360.0d)) * 2.0d * Math.PI;
//System.out.println("ANGLE = " + angle);
rr = radius * 0.25 * (1 - targetrefs / (double) maxtargetrefs);
re = radius * 0.5 * (thisrefs / (double) maxthisrefs);
graph.addNode(targethost, x + (radius - rr - re) * Math.cos(angle), y + (radius - rr - re) * Math.sin(angle), nextlayer);
mynodes++;
if (maxnodes-- <= 0 || System.currentTimeMillis() >= timeout) break;
graph.addNode(targethost, pivotpoint.x + (radius - rr - re) * Math.cos(angle), pivotpoint.y + (radius - rr - re) * Math.sin(angle), nextlayer);
branches++;
if (maxnodes-- <= 0 || (bf > 0 && branches >= bf) || System.currentTimeMillis() >= timeout) break;
}
// recursively set next hosts
int nextnodes;
for (String[] target: targets) {
targethash = target[0];
targethost = target[1];
final GraphPlotter.Point c = graph.getNode(targethost);
assert c != null;
nextnodes = ((maxnodes <= 0) || (System.currentTimeMillis() >= timeout)) ? 0 : place(graph, structure, targethash, targethost, maxnodes, timeout, c.x, c.y, nextlayer, maxlayer, cyc);
mynodes += nextnodes;
for (Map.Entry<String, String> target: targets) {
nextnodes = ((maxnodes <= 0) || (System.currentTimeMillis() >= timeout)) ? 0 : place(graph, structure, target, bf, maxnodes, timeout, nextlayer, maxlayer, cyc);
branches += nextnodes;
maxnodes -= nextnodes;
graph.setEdge(centerhost, targethost);
graph.setEdge(pivotnode.getValue(), target.getValue());
}
return mynodes;
return branches;
}
}
}

@ -185,22 +185,18 @@ public class WebStructureGraph {
}
private static int refstr2count(final String refs) {
if ( (refs == null) || (refs.length() <= 8) ) {
return 0;
}
if (refs == null || refs.length() <= 8) return 0;
assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length();
return (refs.length() - 8) / 10;
}
private static Map<String, Integer> refstr2map(final String refs) {
if ( (refs == null) || (refs.length() <= 8) ) {
return new HashMap<String, Integer>();
}
if (refs == null || refs.length() <= 8) return new HashMap<String, Integer>();
final Map<String, Integer> map = new HashMap<String, Integer>();
String c;
final int refsc = refstr2count(refs);
int d;
for ( int i = 0; i < refsc; i++ ) {
for (int i = 0; i < refsc; i++) {
c = refs.substring(8 + i * 10, 8 + (i + 1) * 10);
try {
d = Integer.valueOf(c.substring(6), 16);
@ -302,12 +298,10 @@ public class WebStructureGraph {
}
}
}
if ( h.isEmpty() ) {
return null;
}
if (h.isEmpty()) return null;
return new StructureEntry(hosthash, hostname, date, h);
}
public StructureEntry incomingReferences(final String hosthash) {
final String hostname = hostHash2hostName(hosthash);
if ( hostname == null ) {
@ -512,9 +506,7 @@ public class WebStructureGraph {
public int referencesCount(final String hosthash) {
// returns the number of hosts that are referenced by this hosthash
assert hosthash.length() == 6 : "hosthash = " + hosthash;
if ( hosthash == null || hosthash.length() != 6 ) {
return 0;
}
if (hosthash == null || hosthash.length() != 6) return 0;
SortedMap<String, byte[]> tailMap;
int c = 0;
synchronized ( this.structure_old ) {

@ -169,8 +169,7 @@ public class GraphPlotter implements Cloneable {
}
public Point addNode(final String node, Point p) {
final Point p0 = this.nodes.put(node, p);
assert p0 == null; // all add shall be unique
this.nodes.put(node, p);
if (p.x > this.rightmost) this.rightmost = p.x;
if (p.x < this.leftmost) this.leftmost = p.x;
if (p.y > this.topmost) this.topmost = p.y;

Loading…
Cancel
Save