enhanced the HostBrowser:

- showing also outbound links to other domains if there are any
- the outbound links browser shows also the link structure image
- showing even inbound links if the web structure graph has information
about that
- removed the left menu and made the HostBrowser a part of the top menu
for search
- moved the file search also to the top menu
- added hover information in the HostBrowser to explain what the click
means
- because the HostBrowser also links to the Metadata viewer ViewFile,
there should be a button to switch back to the HostBrowser: added that
also.
pull/1/head
Michael Peter Christen 12 years ago
parent 21fe8339b4
commit cc98496ff3

@ -59,7 +59,7 @@ function updatepage(str) {
</script>
</head>
<body id="IndexControl">
#%env/templates/header.template%#
#%env/templates/simpleheader.template%#
<h2>Host Browser</h2>
<p>Browse the index of #[ucount]# documents. Enter a host or an URL for a file list or select one of a <a href="/HostBrowser.html?hosts=">list of hosts.</a></p>
<form action="HostBrowser.html" id="searchform" method="get" onkeyup="xmlhttpPost(); return false;">
@ -77,7 +77,7 @@ function updatepage(str) {
<fieldset><legend>Host List</legend>
#{list}#
<div style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><a href="/HostBrowser.html?path=#[host]#">#[host]#</a></div>
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div id="info"><a href="/HostBrowser.html?path=#[host]#">#[host]#</a><span>browse #[host]#</span></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</div>
#{/list}#
@ -103,7 +103,7 @@ function updatepage(str) {
#{list}#
#(type)#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td align="center"><a href="/ViewFile.html?url=#[file]#"><img src="/env/grafics/doc.gif"/></a></td>
<td align="center"><div id="info"><a href="/ViewFile.html?url=#[file]#"><img src="/env/grafics/doc.gif"/></a><span>Show Metadata</span></div></td>
<td align="left" nowrap class=#(stored)#"listingem"::"listing"#(/stored)#>#[file]#&nbsp;<a href="#[file]#"><img src="/env/grafics/link.gif"/></a></td>
#(stored)#
#(load)#::<td align="right" nowrap class="listingnok"><a href="/HostBrowser.html?load=#[file]#&path=#[path]#">load &amp; index</a>#(/load)#</td>::
@ -122,7 +122,32 @@ function updatepage(str) {
</table>
</fieldset>
#(/files)#
#(outbound)#::
<fieldset><legend>Outbound Links, outgoing from #[host]# - Host List</legend>
#{list}#
<div style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div id="info"><a href="/HostBrowser.html?path=#[link]#">#[host]#</a><span>browse #[host]#</span></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</div>
#{/list}#
<p style="clear:both"><br/>
<img src="/WebStructurePicture_p.png?host=#[host]#&depth=3&width=1024&height=576&nodes=300&time=1000&colortext=888888&colorback=FFFFFF&colordot=11BB11&colorline=222222&colorlineend=333333">
</p>
</fieldset>
#(/outbound)#
#(inbound)#::
<fieldset><legend>Inbound Links, incoming to #[host]# - Host List</legend>
#{list}#
<div style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div id="info"><a href="/HostBrowser.html?path=#[host]#">#[host]#</a><span>browse #[host]#</span></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</div>
#{/list}#
</fieldset>
#(/inbound)#
#(admin)#::
<form action="HostBrowser.html" method="get">
<fieldset><legend>Administration Options</legend>

@ -20,7 +20,6 @@
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@ -32,13 +31,18 @@ import java.util.concurrent.BlockingQueue;
import org.apache.solr.common.SolrDocument;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.crawler.retrieval.Request;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.graphics.WebStructureGraph.StructureEntry;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Fulltext;
import net.yacy.search.index.SolrConfiguration;
@ -152,35 +156,49 @@ public class HostBrowser {
// generate file list from path
DigestURI uri = new DigestURI(path);
String host = uri.getHost();
prop.putHTML("outbound_host", host);
prop.putHTML("inbound_host", host);
String hosthash = ASCII.String(uri.hash(), 6, 12);
// get all files for a specific host from the index
BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(YaCySchema.host_s.name() + ":" + host, 0, 100000, 60000);
SolrDocument doc;
Set<String> storedDocs = new HashSet<String>();
Set<String> linkedDocs = new HashSet<String>();
Set<String> inboundLinks = new HashSet<String>();
Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
int hostsize = 0;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String u = (String) doc.getFieldValue(YaCySchema.sku.name());
hostsize++;
if (u.startsWith(path)) storedDocs.add(u);
Collection<Object> urlstub = doc.getFieldValues(YaCySchema.inboundlinks_urlstub_txt.name());
Collection<String> urlprot = urlstub == null ? null : SolrConfiguration.indexedList2protocolList(doc.getFieldValues(YaCySchema.inboundlinks_protocol_sxt.name()), urlstub.size());
if (urlprot != null && urlstub != null) {
assert urlprot.size() == urlstub.size();
Object[] urlprota = urlprot.toArray();
Object[] urlstuba = urlstub.toArray();
for (int i = 0; i < urlprota.length; i++) {
u = ((String) urlprota[i]) + "://" + ((String) urlstuba[i]);
int hp = u.indexOf('#');
if (hp > 0) u = u.substring(0, hp);
if (u.startsWith(path) && !storedDocs.contains(u)) linkedDocs.add(u);
}
// collect inboundlinks to browse the host
Iterator<String> links = SolrConfiguration.getLinks(doc, true);
while (links.hasNext()) {
u = links.next();
if (u.startsWith(path) && !storedDocs.contains(u)) inboundLinks.add(u);
}
// collect outboundlinks to browse to the outbound
links = SolrConfiguration.getLinks(doc, false);
while (links.hasNext()) {
u = links.next();
try {
MultiProtocolURI mu = new MultiProtocolURI(u);
if (mu.getHost() != null) {
ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
if (lks == null) {
lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
outboundHosts.put(mu.getHost(), lks);
}
lks.set(u, u.length());
}
} catch (MalformedURLException e) {}
}
}
// now combine both lists into one
Map<String, Boolean> files = new HashMap<String, Boolean>();
for (String u: storedDocs) files.put(u, true);
for (String u: linkedDocs) if (!storedDocs.contains(u)) files.put(u, false);
for (String u: inboundLinks) if (!storedDocs.contains(u)) files.put(u, false);
// distinguish files and folders
Map<String, Object> list = new TreeMap<String, Object>();
@ -235,6 +253,47 @@ public class HostBrowser {
prop.put("files_hostsize", hostsize);
prop.put("files_subpathsize", storedDocs.size());
prop.put("files", 1);
// generate inbound-links table
StructureEntry struct = sb.webStructure.incomingReferences(hosthash);
if (struct != null && struct.references.size() > 0) {
maxcount = 200;
ReversibleScoreMap<String> score = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
for (Map.Entry<String, Integer> entry: struct.references.entrySet()) score.set(entry.getKey(), entry.getValue());
c = 0;
Iterator<String> i = score.keys(false);
while (i.hasNext() && c < maxcount) {
host = i.next();
prop.put("inbound_list_" + c + "_host", sb.webStructure.hostHash2hostName(host));
prop.put("inbound_list_" + c + "_count", score.get(host));
c++;
}
prop.put("inbound_list", c);
prop.put("inbound", 1);
} else {
prop.put("inbound", 0);
}
// generate outbound-links table
if (outboundHosts.size() > 0) {
maxcount = 200;
ReversibleScoreMap<String> score = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
for (Map.Entry<String, ReversibleScoreMap<String>> entry: outboundHosts.entrySet()) score.set(entry.getKey(), entry.getValue().size());
c = 0;
Iterator<String> i = score.keys(false);
while (i.hasNext() && c < maxcount) {
host = i.next();
prop.put("outbound_list_" + c + "_host", host);
prop.put("outbound_list_" + c + "_count", score.get(host));
prop.put("outbound_list_" + c + "_link", outboundHosts.get(host).getMinKey());
c++;
}
prop.put("outbound_list", c);
prop.put("outbound", 1);
} else {
prop.put("outbound", 0);
}
} catch (Throwable e) {
Log.logException(e);
}

@ -80,8 +80,9 @@ function updatepage(str) {
<dl>
<dt>URL:</dt>
<dd>
<input type="text" size="60" name="url" value="#[url]#" />
<input type="submit" name="show" value="Show" />
<input type="text" size="60" name="url" id="url" value="#[url]#" />
<input type="submit" name="show" value="Show Metadata" />
<input type="button" value="Browse Host" onClick="location.href='/HostBrowser.html?path=' + document.getElementById('url').value" />
<div id="searchresults"></div>
</dd>
</dl>

@ -9,7 +9,7 @@
<ul class="menu">
<li><a href="/index.html" accesskey="s" class="MenuItemLink">Web Search</a></li>
<li><a href="/yacyinteractive.html" class="MenuItemLink">File Search</a></li>
<li><a href="/HostBrowser.html" class="MenuItemLink">Host Browser</a></li>
<li><a href="/HostBrowser.html?hosts=" class="MenuItemLink">Host Browser</a></li>
<li><a href="/yacysearch_location.html" class="MenuItemLink">Location Search</a></li>
<li><a href="/solr/select?q=*:*&start=0&rows=3" class="MenuItemLink">Embedded Solr API</a></li>
<!--<li><a href="/yacy/ui/" accesskey="s" class="MenuItemLink">Rich Client Search</a></li>-->

@ -1,10 +1,12 @@
<div class="SubMenu">
<ul class="SubMenu">
<li style="width:20%;"><a href="/Status.html" class="MenuItemLink">Administration</a></li>
<li style="width:20%;"><a href="/index.html" onclick="this.href='/index.html?handover='+document.searchform.search.value" class="MenuItemLink">Web Search</a></li>
<li style="width:20%;"><a href="/Network.html?menu=simple" class="MenuItemLink">Search Network</a></li>
<li style="width:20%;"><a href="/ViewProfile.html?hash=localhash" class="MenuItemLink">Peer Owner Profile</a></li>
<li style="width:19%;"><a href="http://www.yacy-websearch.net/wiki/" class="MenuItemLink">Help / YaCy Wiki</a></li>
<li style="width:15%;"><a href="/Status.html" class="MenuItemLink">Administration</a></li>
<li style="width:15%;"><a href="/index.html" onclick="this.href='/index.html?handover='+document.searchform.search.value" class="MenuItemLink">Web Search</a></li>
<li style="width:15%;"><a href="/yacyinteractive.html" onclick="this.href='/yacyinteractive.html?handover='+document.searchform.search.value" class="MenuItemLink">File Search</a></li>
<li style="width:15%;"><a href="/HostBrowser.html?hosts=" class="MenuItemLink">HostBrowser</a></li>
<li style="width:15%;"><a href="/Network.html?menu=simple" class="MenuItemLink">Search Network</a></li>
<li style="width:15%;"><a href="/ViewProfile.html?hash=localhash" class="MenuItemLink">Peer Owner Profile</a></li>
<li style="width:9%;"><a href="http://www.yacy-websearch.net/wiki/" class="MenuItemLink">Help / YaCy Wiki</a></li>
</ul>
</div>
<div style="clear:left; padding:10px;">

@ -44,7 +44,7 @@ document.write("\<a id=\"rsslink\" href=\"yacysearch.rss?" + window.location.sea
The query format is similar to <a href="http://www.loc.gov/standards/sru/">SRU</a>.
Click the API icon to see an example call to the search rss API.
To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de/wiki/index.php/Dev:API">API wiki page</a>.</span>
</div>
</div>
#%env/templates/simpleheader.template%#
#(/topmenu)#
<div>

@ -34,6 +34,7 @@ import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@ -826,6 +827,25 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
return a;
}
public static Iterator<String> getLinks(SolrDocument doc, boolean inbound) {
Collection<Object> urlstub = doc.getFieldValues((inbound ? YaCySchema.inboundlinks_urlstub_txt : YaCySchema.outboundlinks_urlstub_txt).name());
Collection<String> urlprot = urlstub == null ? null : indexedList2protocolList(doc.getFieldValues((inbound ? YaCySchema.inboundlinks_protocol_sxt : YaCySchema.outboundlinks_protocol_sxt).name()), urlstub.size());
String u;
LinkedHashSet<String> list = new LinkedHashSet<String>();
if (urlprot != null && urlstub != null) {
assert urlprot.size() == urlstub.size();
Object[] urlprota = urlprot.toArray();
Object[] urlstuba = urlstub.toArray();
for (int i = 0; i < urlprota.length; i++) {
u = ((String) urlprota[i]) + "://" + ((String) urlstuba[i]);
int hp = u.indexOf('#');
if (hp > 0) u = u.substring(0, hp);
list.add(u);
}
}
return list.iterator();
}
/**
* encode a string containing attributes from anchor rel properties binary:
* bit 0: "me" contained in rel

Loading…
Cancel
Save