showing the web structure graph as animation in the crawl monitor

pull/1/head
Michael Peter Christen 12 years ago
parent 39317a6c66
commit ae6feb5610

@ -178,6 +178,26 @@
</tr>
#{/list}#
</table>
#(linkstructure)#::
<script type="text/javascript">
<!--
imagestub = "WebStructurePicture_p.png?host=#[hosts]#&depth=3&width=1024&height=576&nodes=300&time=1000&colortext=888888&colorback=FFFFFF&colordot=11BB11&colorline=222222&colorlineend=333333";
idx = 0;
setTimeout("doanimation()", 2000);
function doanimation() {
var accessPicture = document.getElementById("WebPicture");
if (accessPicture != null) {
idx++;
accessPicture.src = imagestub + "&idx=" + idx;
setTimeout("doanimation()", 1000);
}
}
-->
</script>
<div style="clear:both; text-align:left;">
<img id="WebPicture" src="/env/grafics/invisible.png"/>
</div>
#(/linkstructure)#
<h3>Crawled Pages</h3>
<p id="crawllist"></p>
</fieldset>

@ -171,7 +171,7 @@ public class Crawler_p {
try {
DigestURI crawlingStartURL = new DigestURI(crawlingStart);
rootURLs.add(crawlingStartURL);
crawlName += crawlingStartURL.getHost() + "_";
crawlName += crawlingStartURL.getHost() + ',';
if (crawlingStartURL != null && (crawlingStartURL.isFile() || crawlingStartURL.isSMB())) storeHTCache = false;
} catch (MalformedURLException e) {
@ -180,8 +180,11 @@ public class Crawler_p {
} else {
crawlName = crawlingFile.getName();
}
if (crawlName.length() > 80) crawlName = crawlName.substring(0, 80);
if (crawlName.endsWith("_")) crawlName = crawlName.substring(0, crawlName.length() - 1);
if (crawlName.length() > 256) {
int p = crawlName.lastIndexOf(',');
if (p >= 8) crawlName = crawlName.substring(0, p);
}
if (crawlName.endsWith(",")) crawlName = crawlName.substring(0, crawlName.length() - 1);
// set the crawl filter
@ -515,16 +518,41 @@ public class Crawler_p {
final int domlistlength = (post == null) ? 160 : post.getInt("domlistlength", 160);
CrawlProfile profile;
// put active crawls into list
String hosts = "";
for (final byte[] h: sb.crawler.getActive()) {
profile = sb.crawler.getActive(h);
if (CrawlProfile.ignoreNames.contains(profile.name())) continue;
profile.putProfileEntry("crawlProfilesShow_list_", prop, true, dark, count, domlistlength);
if (profile.urlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN) {
hosts = hosts + "," + profile.name();
}
dark = !dark;
count++;
}
prop.put("crawlProfilesShow_list", count);
prop.put("crawlProfilesShow", count == 0 ? 0 : 1);
if (count > 0) {
// collect the host names for 'wide' crawls which can be visualized
boolean showLinkstructure = hosts.length() > 0;
/*
// check if there is actually something to see
if (showLinkstructure) {
showLinkstructure = false;
for (String host: hosts.substring(1).split(",")) {
String hash = null;
try {hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);} catch (final MalformedURLException e) {Log.logException(e);}
if (hash != null && sb.webStructure.referencesCount(hash) > 0) {showLinkstructure = true; break;}
}
}
*/
if (showLinkstructure) {
prop.put("crawlProfilesShow_linkstructure", 1);
prop.put("crawlProfilesShow_linkstructure_hosts", hosts.substring(1));
} else {
prop.put("crawlProfilesShow_linkstructure", 0);
}
}
// return rewrite properties
return prop;

@ -133,7 +133,7 @@ public class QuickCrawlLink_p {
CrawlProfile pe = null;
try {
pe = new CrawlProfile(
crawlingStartURL.toNormalform(true),
(crawlingStartURL.getHost() == null) ? crawlingStartURL.toNormalform(true) : crawlingStartURL.getHost(),
crawlingMustMatch, //crawlerUrlMustMatch
crawlingMustNotMatch, //crawlerUrlMustNotMatch
CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch

@ -116,7 +116,7 @@ public class WebStructurePicture_p {
String hash = null;
try {hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);} catch (final MalformedURLException e) {Log.logException(e);}
Map.Entry<String, String> centernode = new AbstractMap.SimpleEntry<String, String>(hash, host);
double angle = 2.0d * i * Math.PI / hostlist.length - Math.PI / hostlist.length;
double angle = 2.0d * i * Math.PI / hostlist.length;
if (hostlist.length == 3) angle -= Math.PI / 2;
if (hostlist.length == 4) angle += Math.PI / 4;
graph.addNode(centernode.getValue(), Math.cos(angle) / 8, Math.sin(angle) / 8, 0);
@ -128,7 +128,8 @@ public class WebStructurePicture_p {
// test with: http://localhost:8090/WebStructurePicture_p.png?pa=1&ral=0.7&raa=0.5&rar=2&rel=0.5&rea=1&rer=2
GraphPlotter.Ribbon rAll = new GraphPlotter.Ribbon(post.getFloat("ral", 0.1f), post.getFloat("raa", 0.1f), post.getFloat("rar", 0.1f));
GraphPlotter.Ribbon rEdge = new GraphPlotter.Ribbon(post.getFloat("rel", 0.05f), post.getFloat("rea", 0.1f), post.getFloat("rer", 0.1f));
for (int i = 0; i < post.getInt("pa", 1); i++) graph = graph.physics(rAll, rEdge);
int pa = post.getInt("pa", 0);
for (int i = 0; i < pa; i++) graph = graph.physics(rAll, rEdge);
}
// draw the graph

@ -156,6 +156,8 @@ public final class HTTPLoader {
this.log.logInfo("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL " + requestURLString);
this.log.logInfo("CRAWLER ..Redirecting request to: " + redirectionUrl);
this.sb.webStructure.generateCitationReference(url, redirectionUrl);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
}

@ -171,9 +171,27 @@ public class WebStructureGraph {
}
}
final LearnObject lro = new LearnObject(url, globalRefURLs);
if (!globalRefURLs.isEmpty()) {
try {
if (this.publicRefDNSResolvingWorker.isAlive()) {
this.publicRefDNSResolvingQueue.put(lro);
} else {
learnrefs(lro);
}
} catch ( final InterruptedException e ) {
learnrefs(lro);
}
}
}
public void generateCitationReference(final DigestURI from, final DigestURI to) {
final HashSet<MultiProtocolURI> globalRefURLs = new HashSet<MultiProtocolURI>();
final String refhost = from.getHost();
if (refhost != null && to.getHost() != null && !to.getHost().equals(refhost)) globalRefURLs.add(to);
final LearnObject lro = new LearnObject(from, globalRefURLs);
if ( !globalRefURLs.isEmpty() ) {
try {
if ( this.publicRefDNSResolvingWorker.isAlive() ) {
if (this.publicRefDNSResolvingWorker.isAlive()) {
this.publicRefDNSResolvingQueue.put(lro);
} else {
learnrefs(lro);

@ -182,7 +182,7 @@ public class GraphPlotter implements Cloneable {
}
public boolean hasEdge(final String fromNode, final String toNode) {
return this.edges.contains(fromNode + "-" + toNode);
return this.edges.contains(fromNode + '-' + toNode);
}
public void setEdge(final String fromNode, final String toNode) {
@ -190,18 +190,18 @@ public class GraphPlotter implements Cloneable {
final Point to = this.nodes.get(toNode);
assert from != null;
assert to != null;
this.edges.add(fromNode + "$" + toNode);
this.edges.add(fromNode + '$' + toNode);
}
public Collection<String> getEdges(final String node, boolean start) {
Collection<String> c = new ArrayList<String>();
if (start) {
String s = node + "$";
String s = node + '$';
for (String e: this.edges) {
if (e.startsWith(s)) c.add(e.substring(s.length()));
}
} else {
String s = "$" + node;
String s = '$' + node;
for (String e: this.edges) {
if (e.endsWith(s)) c.add(e.substring(0, e.length() - s.length()));
}
@ -219,7 +219,7 @@ public class GraphPlotter implements Cloneable {
entry = i.next();
name = entry.getKey();
c = entry.getValue();
System.out.println("point(" + c.x + ", " + c.y + ", " + c.layer + ") [" + name + "]");
System.out.println("point(" + c.x + ", " + c.y + ", " + c.layer + ") [" + name + ']');
}
final Iterator<String> j = this.edges.iterator();
while (j.hasNext()) {

Loading…
Cancel
Save