diff --git a/htroot/Crawler_p.html b/htroot/Crawler_p.html
index 3b06b914c..ed9e7844a 100644
--- a/htroot/Crawler_p.html
+++ b/htroot/Crawler_p.html
@@ -178,6 +178,26 @@
#{/list}#
+#(linkstructure)#::
+
+
+
+
+#(/linkstructure)#
Crawled Pages
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index eb62823d5..dd607bfa8 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -171,7 +171,7 @@ public class Crawler_p {
try {
DigestURI crawlingStartURL = new DigestURI(crawlingStart);
rootURLs.add(crawlingStartURL);
- crawlName += crawlingStartURL.getHost() + "_";
+ crawlName += crawlingStartURL.getHost() + ',';
if (crawlingStartURL != null && (crawlingStartURL.isFile() || crawlingStartURL.isSMB())) storeHTCache = false;
} catch (MalformedURLException e) {
@@ -180,8 +180,11 @@ public class Crawler_p {
} else {
crawlName = crawlingFile.getName();
}
- if (crawlName.length() > 80) crawlName = crawlName.substring(0, 80);
- if (crawlName.endsWith("_")) crawlName = crawlName.substring(0, crawlName.length() - 1);
+ if (crawlName.length() > 256) {
+ int p = crawlName.lastIndexOf(',');
+ if (p >= 8) crawlName = crawlName.substring(0, p);
+ }
+ if (crawlName.endsWith(",")) crawlName = crawlName.substring(0, crawlName.length() - 1);
// set the crawl filter
@@ -515,16 +518,41 @@ public class Crawler_p {
final int domlistlength = (post == null) ? 160 : post.getInt("domlistlength", 160);
CrawlProfile profile;
// put active crawls into list
+ String hosts = "";
for (final byte[] h: sb.crawler.getActive()) {
profile = sb.crawler.getActive(h);
if (CrawlProfile.ignoreNames.contains(profile.name())) continue;
profile.putProfileEntry("crawlProfilesShow_list_", prop, true, dark, count, domlistlength);
+ if (profile.urlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN) {
+ hosts = hosts + "," + profile.name();
+ }
dark = !dark;
count++;
}
prop.put("crawlProfilesShow_list", count);
prop.put("crawlProfilesShow", count == 0 ? 0 : 1);
+ if (count > 0) {
+ // collect the host names for 'wide' crawls which can be visualized
+ boolean showLinkstructure = hosts.length() > 0;
+ /*
+ // check if there is actually something to see
+ if (showLinkstructure) {
+ showLinkstructure = false;
+ for (String host: hosts.substring(1).split(",")) {
+ String hash = null;
+ try {hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);} catch (final MalformedURLException e) {Log.logException(e);}
+ if (hash != null && sb.webStructure.referencesCount(hash) > 0) {showLinkstructure = true; break;}
+ }
+ }
+ */
+ if (showLinkstructure) {
+ prop.put("crawlProfilesShow_linkstructure", 1);
+ prop.put("crawlProfilesShow_linkstructure_hosts", hosts.substring(1));
+ } else {
+ prop.put("crawlProfilesShow_linkstructure", 0);
+ }
+ }
// return rewrite properties
return prop;
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index 10e436a1a..894427fce 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -133,7 +133,7 @@ public class QuickCrawlLink_p {
CrawlProfile pe = null;
try {
pe = new CrawlProfile(
- crawlingStartURL.toNormalform(true),
+ (crawlingStartURL.getHost() == null) ? crawlingStartURL.toNormalform(true) : crawlingStartURL.getHost(),
crawlingMustMatch, //crawlerUrlMustMatch
crawlingMustNotMatch, //crawlerUrlMustNotMatch
CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
diff --git a/htroot/WebStructurePicture_p.java b/htroot/WebStructurePicture_p.java
index 1d703049d..f48212fe7 100644
--- a/htroot/WebStructurePicture_p.java
+++ b/htroot/WebStructurePicture_p.java
@@ -116,7 +116,7 @@ public class WebStructurePicture_p {
String hash = null;
try {hash = ASCII.String((new DigestURI("http://" + host)).hash(), 6, 6);} catch (final MalformedURLException e) {Log.logException(e);}
Map.Entry centernode = new AbstractMap.SimpleEntry(hash, host);
- double angle = 2.0d * i * Math.PI / hostlist.length - Math.PI / hostlist.length;
+ double angle = 2.0d * i * Math.PI / hostlist.length;
if (hostlist.length == 3) angle -= Math.PI / 2;
if (hostlist.length == 4) angle += Math.PI / 4;
graph.addNode(centernode.getValue(), Math.cos(angle) / 8, Math.sin(angle) / 8, 0);
@@ -128,7 +128,8 @@ public class WebStructurePicture_p {
// test with: http://localhost:8090/WebStructurePicture_p.png?pa=1&ral=0.7&raa=0.5&rar=2&rel=0.5&rea=1&rer=2
GraphPlotter.Ribbon rAll = new GraphPlotter.Ribbon(post.getFloat("ral", 0.1f), post.getFloat("raa", 0.1f), post.getFloat("rar", 0.1f));
GraphPlotter.Ribbon rEdge = new GraphPlotter.Ribbon(post.getFloat("rel", 0.05f), post.getFloat("rea", 0.1f), post.getFloat("rer", 0.1f));
- for (int i = 0; i < post.getInt("pa", 1); i++) graph = graph.physics(rAll, rEdge);
+ int pa = post.getInt("pa", 0);
+ for (int i = 0; i < pa; i++) graph = graph.physics(rAll, rEdge);
}
// draw the graph
diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java
index dfa668c95..bad4a153b 100644
--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@@ -156,6 +156,8 @@ public final class HTTPLoader {
this.log.logInfo("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL " + requestURLString);
this.log.logInfo("CRAWLER ..Redirecting request to: " + redirectionUrl);
+ this.sb.webStructure.generateCitationReference(url, redirectionUrl);
+
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
}
diff --git a/source/net/yacy/peers/graphics/WebStructureGraph.java b/source/net/yacy/peers/graphics/WebStructureGraph.java
index d7bea501c..49e0bcb4f 100644
--- a/source/net/yacy/peers/graphics/WebStructureGraph.java
+++ b/source/net/yacy/peers/graphics/WebStructureGraph.java
@@ -171,9 +171,27 @@ public class WebStructureGraph {
}
}
final LearnObject lro = new LearnObject(url, globalRefURLs);
+ if (!globalRefURLs.isEmpty()) {
+ try {
+ if (this.publicRefDNSResolvingWorker.isAlive()) {
+ this.publicRefDNSResolvingQueue.put(lro);
+ } else {
+ learnrefs(lro);
+ }
+ } catch ( final InterruptedException e ) {
+ learnrefs(lro);
+ }
+ }
+ }
+
+ public void generateCitationReference(final DigestURI from, final DigestURI to) {
+ final HashSet globalRefURLs = new HashSet();
+ final String refhost = from.getHost();
+ if (refhost != null && to.getHost() != null && !to.getHost().equals(refhost)) globalRefURLs.add(to);
+ final LearnObject lro = new LearnObject(from, globalRefURLs);
if ( !globalRefURLs.isEmpty() ) {
try {
- if ( this.publicRefDNSResolvingWorker.isAlive() ) {
+ if (this.publicRefDNSResolvingWorker.isAlive()) {
this.publicRefDNSResolvingQueue.put(lro);
} else {
learnrefs(lro);
diff --git a/source/net/yacy/visualization/GraphPlotter.java b/source/net/yacy/visualization/GraphPlotter.java
index 592c59494..39ea0c22d 100644
--- a/source/net/yacy/visualization/GraphPlotter.java
+++ b/source/net/yacy/visualization/GraphPlotter.java
@@ -182,7 +182,7 @@ public class GraphPlotter implements Cloneable {
}
public boolean hasEdge(final String fromNode, final String toNode) {
- return this.edges.contains(fromNode + "-" + toNode);
+ return this.edges.contains(fromNode + '-' + toNode);
}
public void setEdge(final String fromNode, final String toNode) {
@@ -190,18 +190,18 @@ public class GraphPlotter implements Cloneable {
final Point to = this.nodes.get(toNode);
assert from != null;
assert to != null;
- this.edges.add(fromNode + "$" + toNode);
+ this.edges.add(fromNode + '$' + toNode);
}
public Collection getEdges(final String node, boolean start) {
Collection c = new ArrayList();
if (start) {
- String s = node + "$";
+ String s = node + '$';
for (String e: this.edges) {
if (e.startsWith(s)) c.add(e.substring(s.length()));
}
} else {
- String s = "$" + node;
+ String s = '$' + node;
for (String e: this.edges) {
if (e.endsWith(s)) c.add(e.substring(0, e.length() - s.length()));
}
@@ -219,7 +219,7 @@ public class GraphPlotter implements Cloneable {
entry = i.next();
name = entry.getKey();
c = entry.getValue();
- System.out.println("point(" + c.x + ", " + c.y + ", " + c.layer + ") [" + name + "]");
+ System.out.println("point(" + c.x + ", " + c.y + ", " + c.layer + ") [" + name + ']');
}
final Iterator j = this.edges.iterator();
while (j.hasNext()) {