// //$LastChangedDate$ //$LastChangedRevision$ //$LastChangedBy$ // import java.util.Iterator; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.sorting.ReversibleScoreMap; import net.yacy.crawler.CrawlSwitchboard; import net.yacy.crawler.data.CrawlProfile; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.search.Switchboard; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; public class WatchWebStructure_p { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); String color_text = "888888"; String color_back = "FFFFFF"; String color_dot0 = "1111BB"; String color_dota = "11BB11"; String color_line = "222222"; String color_lineend = "333333"; int width = 1280; int height = 720; int depth = 3; int nodes = 300; // maximum number of host nodes that are painted int time = -1; String host = "auto"; String besthost; if (post != null) { width = post.getInt("width", 1280); height = post.getInt("height", 720); depth = post.getInt("depth", 3); nodes = post.getInt("nodes", width * height * 300 / width / height); time = post.getInt("time", -1); host = post.get("host", "auto"); color_text = post.get("colortext", color_text); color_back = post.get("colorback", color_back); color_dot0 = post.get("colordot0", color_dot0); color_dota = post.get("colordota", color_dota); color_line = post.get("colorline", color_line); color_lineend = post.get("colorlineend", color_lineend); } if (host.equals("auto")) { // try to find the host from the crawl profiles CrawlProfile e; for (final byte[] handle: sb.crawler.getActive()) { e = sb.crawler.getActive(handle); if (e.name().equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) || e.name().equals(CrawlSwitchboard.CRAWL_PROFILE_REMOTE) || e.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) || e.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) || e.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) || e.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) || e.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE)) continue; host = e.name(); break; // take the first one } } // fix start point if a "www."-prefix would be better if (host != null && !host.startsWith("www")) { if (sb.webStructure.referencesCount(DigestURI.hosthash6("www." + host)) > sb.webStructure.referencesCount(DigestURI.hosthash6(host))) { host = "www." + host; } } if (post != null && post.containsKey("hosts")) { int maxcount = 200; ReversibleScoreMap score = sb.webStructure.hostReferenceScore(); int c = 0; Iterator i = score.keys(false); String h; while (i.hasNext() && c < maxcount) { h = i.next(); prop.put("hosts_list_" + c + "_host", h); prop.put("hosts_list_" + c + "_count", score.get(h)); c++; } prop.put("hosts_list", c); prop.put("hosts", 1); } // find start point if (host == null || host.isEmpty() || host.equals("auto") // || sb.webStructure.referencesCount(DigestURI.hosthash6(host)) == 0 ) { // find domain with most references besthost = sb.webStructure.hostWithMaxReferences(); if (besthost == null) besthost = host; } else { besthost = host; } prop.putHTML("host", host); prop.putHTML("besthost", besthost); prop.put("depth", depth); prop.put("depthi", Math.min(8, depth + 1)); prop.put("depthd", Math.max(0, depth - 1)); prop.put("nodes", nodes); prop.put("nodesi", Math.min(1000, nodes + 100)); prop.put("nodesd", Math.max(100, nodes - 100)); prop.put("time", time); prop.put("timei", (time > 9000) ? -1 : ((time < 0) ? -1 : Math.min(9999, time + 1000))); prop.put("timed", (time < 0) ? 9000 : Math.max(1000, time - 1000)); prop.put("width", width); prop.put("height", height); prop.put("colortext", color_text); prop.put("colorback", color_back); prop.put("colordot0", color_dot0); prop.put("colordota", color_dota); prop.put("colorline", color_line); prop.put("colorlineend", color_lineend); return prop; } }