turned HostBrowser into a admin-only page, now called IndexBrowser

This was required because spiders and bots crawled through this page and
created load on the peer without use for the user or the YaCy network.
pull/402/head
Michael Peter Christen 4 years ago
parent d359d521a1
commit c0d9a3e9a7

@ -51,7 +51,7 @@ url_file_ext_s
## either the second level domain or, if a ccSLD is used, the third level domain. Needed to search in the url
host_organization_s
## internal links, only the protocol. Needed for HostBrowser
## internal links, only the protocol. Needed for IndexBrowser
inboundlinks_protocol_sxt
## internal links, the url only without the protocol. For correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed

@ -939,7 +939,7 @@ search.result.show.citation = true
search.result.show.pictures = false
search.result.show.cache = true
search.result.show.proxy = false
search.result.show.hostbrowser = true
search.result.show.indexbrowser = true
search.result.show.vocabulary = false
# Set of comma separated vocabulary names not to be used as search results facets
search.result.show.vocabulary.omit =
@ -1142,7 +1142,7 @@ svnRevision=0
currentSkin=pdblue
# flag to show if pages shall be usable for non-admin users
# this can be applied to the Surftips.html, yacysearch.html and HostBrowser.html pages
# this can be applied to the Surftips.html, yacysearch.html and IndexBrowser_p.html pages
publicSurftips = true
publicSearchpage = true

@ -294,7 +294,7 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="picturesLink" href="yacysearch.html" target="LayouTest">Pictures</a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="cacheLink" href="CacheResource_p.html" target="LayouTest">Cache</a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="proxyLink" href="proxy.html" target="LayouTest">View via Proxy</a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="browseIndexLink" href="HostBrowser.html" target="LayouTest"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="browseIndexLink" href="IndexBrowser_p.html" target="LayouTest"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="snapshotsLink" href="yacysearch.html" target="LayouTest">JPG Snapshot</a></td>
#(search.result.show.ranking)#::<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><span title="Raw ranking score value">Ranking: 1.12195955E9</span>#(/search.result.show.ranking)#
</tr>
@ -314,7 +314,7 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
menu: System Administration > Advanced Settings
</span></span>
</td>
<td style="text-align:center;"><input type="checkbox" name="search.result.show.hostbrowser" aria-labelledby="browseIndexLink" value="true" #(search.result.show.hostbrowser)#::checked="checked" #(/search.result.show.hostbrowser)# /></td>
<td style="text-align:center;"><input type="checkbox" name="search.result.show.indexbrowser" aria-labelledby="browseIndexLink" value="true" #(search.result.show.indexbrowser)#::checked="checked" #(/search.result.show.indexbrowser)# /></td>
<td style="text-align:center;"><input type="checkbox" name="search.result.show.snapshots" aria-labelledby="snapshotsLink" value="true" #(search.result.show.snapshots)#::checked="checked" #(/search.result.show.snapshots)# /></td>
#(search.result.show.ranking)#::<td style="text-align:center;">
<span class="info" style="padding-left: 10px"><img src="env/grafics/i16.gif" alt="info"/><span>

@ -97,7 +97,7 @@ public class ConfigSearchPage_p {
sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures"));
sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache"));
sb.setConfig("search.result.show.proxy", post.getBoolean("search.result.show.proxy"));
sb.setConfig("search.result.show.hostbrowser", post.getBoolean("search.result.show.hostbrowser"));
sb.setConfig("search.result.show.indexbrowser", post.getBoolean("search.result.show.indexbrowser"));
sb.setConfig("search.result.show.snapshots", post.getBoolean("search.result.show.snapshots"));
// construct navigation String
@ -187,7 +187,7 @@ public class ConfigSearchPage_p {
sb.setConfig("search.result.show.pictures", config.getProperty("search.result.show.pictures","false"));
sb.setConfig("search.result.show.cache", config.getProperty("search.result.show.cache","true"));
sb.setConfig("search.result.show.proxy", config.getProperty("search.result.show.proxy","false"));
sb.setConfig("search.result.show.hostbrowser", config.getProperty("search.result.show.hostbrowser","true"));
sb.setConfig("search.result.show.indexbrowser", config.getProperty("search.result.show.indexbrowser","true"));
sb.setConfig("search.result.show.snapshots", config.getProperty("search.result.show.snapshots","true"));
sb.setConfig(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT,
config.getProperty(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT,
@ -247,7 +247,7 @@ public class ConfigSearchPage_p {
prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0);
prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0);
prop.put("search.result.show.proxy", sb.getConfigBool("search.result.show.proxy", false) ? 1 : 0);
prop.put("search.result.show.hostbrowser", sb.getConfigBool("search.result.show.hostbrowser", false) ? 1 : 0);
prop.put("search.result.show.indexbrowser", sb.getConfigBool("search.result.show.indexbrowser", false) ? 1 : 0);
prop.put("search.result.show.snapshots", sb.getConfigBool("search.result.show.snapshots", false) ? 1 : 0);
prop.put("search.result.show.ranking", sb.getConfigBool(SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING, SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING_DEFAULT) ? 1 : 0);

@ -1,8 +0,0 @@
#(hosts)#::
<form action="HostBrowser.html" method="get">
<fieldset><legend>Administration Options</legend>
<div>Delete all <span class="error">Load Errors</span> from index <input style="width:240px " type="submit" name="deleteLoadErrors" value="Delete Load Errors" class="btn btn-primary"/></div>
</fieldset>
</form>
#(/hosts)#

@ -47,7 +47,7 @@ function updatepage(str) {
html += "<tbody>";
for (var i = 0; i < firstChannel.items.length; i++) {
item = firstChannel.items[i];
html += "<tr class=\"TableCellLight\"><td style=\"padding: 2px;text-align:left;\"><a href=\"HostBrowser.html?admin=#[admin]#&pathsearch=&amp;path=" + item.link + "\">" + item.link + "<\/a><\/td>";
html += "<tr class=\"TableCellLight\"><td style=\"padding: 2px;text-align:left;\"><a href=\"IndexBrowser_p.html?pathsearch=&amp;path=" + item.link + "\">" + item.link + "<\/a><\/td>";
}
html += "</tbody><\/table>";
}
@ -60,27 +60,12 @@ function updatepage(str) {
<link href="env/hypertree.css" rel="stylesheet">
</head>
<body id="IndexControl">
#(topmenu)#
<div class="SubMenu">
<ul class="SubMenu">
<li style="width:15%;"><a class="MenuItemLink" href="index.html"><img src="env/grafics/navsl.gif" height="10px" style="padding-right:10px" align="baseline"/>back to start page</a></li>
</ul>
</div>
#%env/templates/embeddedheader.template%#
::
#%env/templates/simpleheader.template%#
<script type="text/javascript">
document.getElementById("header_hostbrowser").className += " active";
</script>
::
#%env/templates/header.template%#
#%env/templates/header.template%#
#%env/templates/submenuWebStructure.template%#
#(/topmenu)#
<h1>Index Browser</h1>
<p>Browse the index of #[ucount]# documents. Enter a host or an URL for a file list or view a list of <a href="HostBrowser.html?admin=#[admin]#&hosts=">all hosts</a>#(authorized)#::, <a href="HostBrowser.html?admin=#[admin]#&hosts=crawling">only hosts with urls pending in the crawler</a> or <a href="HostBrowser.html?admin=#[admin]#&hosts=error">only with load errors</a>#(/authorized)#.</p>
<form action="HostBrowser.html" id="searchform" method="get" role="search">
<p>Browse the index of #[ucount]# documents. Enter a host or an URL for a file list or view a list of <a href="IndexBrowser_p.html?hosts=">all hosts</a>#(authorized)#::, <a href="IndexBrowser_p.html?hosts=crawling">only hosts with urls pending in the crawler</a> or <a href="IndexBrowser_p.html?hosts=error">only with load errors</a>#(/authorized)#.</p>
<form action="IndexBrowser_p.html" id="searchform" method="get" role="search">
<fieldset class="yacys">
<input type="hidden" name="admin" id="admin" value="#[admin]#" />
<div class="input-group" style="width:600px;float:left;">
<label for="search" class="input-group-addon">Host/URL</label>
<input id="search" type="search" name="path" value="#[path]#" maxlength="250" class="form-control" onkeyup="xmlhttpPost(); return false;"/>
@ -96,17 +81,17 @@ function updatepage(str) {
</fieldset>
</form>
#[result]#
#(hosts)#::
<fieldset><legend>Host List</legend>
<ol style="list-style-type : none; padding-left: 0px">
#{list}#
<li style="float:left; padding:1px 5px 1px 5px;">
<div style="width:180px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><img src="env/grafics/#(type)#invisible.png::burn-e.gif::construction.gif#(/type)#" alt="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" title="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" style="float:left" width="12" height="8">&nbsp;<a href="HostBrowser.html?admin=#[admin]#&path=#[host]#&facetcount=#[count]#">#[host]#</a></div></div>
<div style="width:180px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><img src="env/grafics/#(type)#invisible.png::burn-e.gif::construction.gif#(/type)#" alt="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" title="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" style="float:left" width="12" height="8">&nbsp;<a href="IndexBrowser_p.html?path=#[host]#&facetcount=#[count]#">#[host]#</a></div></div>
<div style="width:120px; text-align:right; float: left; white-space:nowrap; overflow:hidden;"><span class="commit" aria-label="#[count]# valid">#[count]#</span>#(crawler)#::/<span class="pending" aria-label="#[pending]# pending">#[pending]#</span>#(/crawler)##(errors)#::/<span class="info" aria-label="#[exclcount]# excluded">#[exclcount]#</span>/<span class="error" aria-label="#[failcount]# failed">#[failcount]#</span>#(/errors)# URLs</div>
</li>
#{/list}#
</ol>
</ol>
<div style="clear:both; float:left; padding:10px 5px 1px 5px;">
<span>Count Colors:</span>
<span class="commit">&nbsp;&nbsp;&nbsp;Documents without Errors</span>
@ -117,7 +102,7 @@ function updatepage(str) {
#(/authorized)#
</div>
</fieldset>
<link rel="stylesheet" href="env/morris.css">
<script src="js/raphael.min.js"></script>
<script src="js/morris.js"></script>
@ -125,100 +110,100 @@ function updatepage(str) {
<div id="graph" style="height:200px"></div>
<script>
var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=0&wt=json&facet=true&facet.field=dates_in_content_dts&facet.sort=index", function(data) {
dates_in_content_dts = data.facet_counts.facet_fields.dates_in_content_dts;
var parsed = [];
dates_in_content_dts = data.facet_counts.facet_fields.dates_in_content_dts;
var parsed = [];
for (var i = 0; i < dates_in_content_dts.length; i = i + 2) {
var date = dates_in_content_dts[i];
var count = dates_in_content_dts[i + 1];
if (date && count) {parsed[parsed.length] = {x: date,y: count};};
};
if (parsed.length > 0) {
var histogram = Morris.Bar({
element: 'graph',
data: parsed,
xkey: 'x',
ykeys: ['y'],
labels: ['number of documents about this date'],
yLabelFormat: function (y) { return y.toString() + ' docs'; },
barColors: function (row, series, type) {
var d = new Date(row.label);
if (d.getDay() === 6) return '#4aaf46'; //saturday
if (d.getDay() === 0) return '#4aaf46'; //sunday
return '#3574c0';
},
hideHover: 'false'
}).on('click', function(i, row) {
console.log(i, row);
});
/* Add keyboard navigation support and accessible attributes */
makeAccessibleMorrisBar(histogram,
"Number of documents per date histogram",
function(data) {return data.x + " : " + data.y + " docs"});
var histogram = Morris.Bar({
element: 'graph',
data: parsed,
xkey: 'x',
ykeys: ['y'],
labels: ['number of documents about this date'],
yLabelFormat: function (y) { return y.toString() + ' docs'; },
barColors: function (row, series, type) {
var d = new Date(row.label);
if (d.getDay() === 6) return '#4aaf46'; //saturday
if (d.getDay() === 0) return '#4aaf46'; //sunday
return '#3574c0';
},
hideHover: 'false'
}).on('click', function(i, row) {
console.log(i, row);
});
/* Add keyboard navigation support and accessible attributes */
makeAccessibleMorrisBar(histogram,
"Number of documents per date histogram",
function(data) {return data.x + " : " + data.y + " docs"});
}
});
</script>
#(/hosts)#
#(hostanalysis)#::
<fieldset><legend>Host Analysis</legend>
#{facets}#
<table class="sortable" style="float:left; border-width: 0">
<tr class="TableCellDark">
<td style="text-align:left;white-space:nowrap" colspan="5" class="pending">#[facetname]#</td>
<td style="text-align:right;white-space:nowrap" colspan="5" class="listingok">#</td>
<td style="text-align:right;white-space:nowrap" colspan="5" class="listingok">#</td>
</tr>
#{facet}#
<tr class="TableCellLight">
<td style="text-align:left;white-space:nowrap" colspan="5" class="pending">#[key]#</td>
<td style="text-align:right;white-space:nowrap" colspan="5" class="listingok"><a href="#[a]#" target="_blank" class="forceNoExternalIcon">#[count]#</a></td>
<td style="text-align:right;white-space:nowrap" colspan="5" class="listingok"><a href="#[a]#" target="_blank" class="forceNoExternalIcon">#[count]#</a></td>
</tr>
#{/facet}#
</table>&nbsp;&nbsp;
#{/facets}#
</fieldset>
#(/hostanalysis)#
#(files)#::
<fieldset><legend>Browser for <a href="#[path]#" target="_blank">#[path]#</a></legend>
<p>documents stored for host: #[hostsize]#; documents stored for subpath: #[subpathloadsize]#; unloaded documents detected in subpath: #[subpathdetectedsize]# <!-- #(complete)#;<a href="HostBrowser.html?admin=#[admin]#&complete=true&path=#[path]#">get complete list</a>::<a href="HostBrowser.html?admin=#[admin]#&path=#[path]#">directory view</a>#(/complete)#-->
<p>documents stored for host: #[hostsize]#; documents stored for subpath: #[subpathloadsize]#; unloaded documents detected in subpath: #[subpathdetectedsize]# <!-- #(complete)#;<a href="IndexBrowser_p.html?complete=true&path=#[path]#">get complete list</a>::<a href="IndexBrowser_p.html?path=#[path]#">directory view</a>#(/complete)#-->
</p>
<table class="sortable" style="float:left; border-width: 0">
<thead>
<tr>
<th style="text-align:center; width:32px"></th>
<th style="text-align:left; width: 600px" class="listing">Path</th>
<th style="text-align:right; padding:2px;" class="listingem">stored</th>
<th style="text-align:right; padding:2px;" class="listingem">linked</th>
<th style="text-align:right; padding:2px;" class="listingem">pending</th>
<th style="text-align:right; padding:2px;" class="listingem">excluded</th>
<th style="text-align:right; padding:2px;" class="listingem">failed</th>
</tr>
<tr>
<th style="text-align:center; width:32px"></th>
<th style="text-align:left; width: 600px" class="listing">Path</th>
<th style="text-align:right; padding:2px;" class="listingem">stored</th>
<th style="text-align:right; padding:2px;" class="listingem">linked</th>
<th style="text-align:right; padding:2px;" class="listingem">pending</th>
<th style="text-align:right; padding:2px;" class="listingem">excluded</th>
<th style="text-align:right; padding:2px;" class="listingem">failed</th>
</tr>
</thead>
<tbody>
#(root)#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td style="text-align:center"></td>
<td style="text-align:left;white-space:nowrap"><a href="HostBrowser.html?admin=#[admin]#&path=#[path]#" class="listing">..</a></td>
<td style="text-align:left;white-space:nowrap"><a href="IndexBrowser_p.html?path=#[path]#" class="listing">..</a></td>
<td style="text-align:right;white-space:nowrap" colspan="5"></td>
</tr>::
#(/root)#
#{list}#
#(type)#<!--file-->
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td style="text-align:center"><div><a href="ViewFile.html?url=#[url]#"><img src="env/grafics/doc.gif" alt=""/></a><span>Show Metadata</span></div></td>
<td style="text-align:center"><div><a href="ViewFile.html?url=#[url]#"><img src="env/grafics/doc.gif" alt=""/><span>Metadata</span></a></div></td>
<td style="text-align:left;white-space:nowrap" class=#(stored)#"listingem"::"listing"#(/stored)#><a href="#[url]#" target="_blank">#[url]#</a></td>
#(stored)#
#(load)#<td style="text-align:left;white-space:nowrap" colspan="5" class="listingem">link, detected from context</td>::<td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok"><a href="HostBrowser.html?admin=#[admin]#&load=#[url]#&path=#[path]#">load &amp; index</a>#(/load)#</td>::
#(load)#<td style="text-align:left;white-space:nowrap" colspan="5" class="listingem">link, detected from context</td>::<td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok"><a href="IndexBrowser_p.html?load=#[url]#&path=#[path]#">load &amp; index</a>#(/load)#</td>::
<td style="text-align:left;white-space:nowrap" colspan="1" class="listingok">indexed</td><td style="text-align:left;white-space:nowrap" colspan="4" class="listingem">#[comment]#</td>::
<td style="text-align:left;white-space:nowrap" colspan="5" class="pending">loading</td>::
<td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok">#[error]#</td>
<td style="text-align:left;white-space:nowrap" colspan="5" class="pending">loading</td>::
<td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok">#[error]#</td>
#(/stored)#
</tr>::<!--folder-->
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td style="text-align:center"><img src="env/grafics/dir.gif" alt="Directory"/></td>
<td style="text-align:left;white-space:nowrap" class="listing"><a href="HostBrowser.html?admin=#[admin]#&path=#[url]#" class="listing">#[url]#</a></td>
<td style="text-align:left;white-space:nowrap" class="listing"><a href="IndexBrowser_p.html?path=#[url]#" class="listing">#[url]#</a></td>
<td style="text-align:right" class="commit">#[stored]#</td>
<td style="text-align:right" class="listing">#[linked]#</td>
<td style="text-align:right" #(pendingVisible)#class="listingem"::class="pending"#(/pendingVisible)#>#[pending]#</td>
@ -231,7 +216,7 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
</table>
</fieldset>
#(linkgraph)#<div style="text-align:center"><form><input name="showlinkstructure" onClick="location.href = location.toString() + '&showlinkstructure=';" class="btn btn-default btn-xs" value="show link structure graph"/></form></div>::
#(linkgraph)#<div style="text-align:center"><form><input name="showlinkstructure" onClick="location.href = location.toString() + '&showlinkstructure=';" class="btn btn-default btn-xs" value="show link graph"/></form></div>::
<script src="js/d3.v5.min.js"></script>
<script src="js/hypertree.js"></script>
<div id="linkstructure"></div>
@ -246,38 +231,38 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
<ol style="list-style-type : none; padding-left: 0px">
#{list}#
<li style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="HostBrowser.html?admin=#[admin]#&path=#[link]#">#[host]#</a></div></div>
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="IndexBrowser_p.html?path=#[link]#">#[host]#</a></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</li>
#{/list}#
</ol>
#(admin)#::
<p style="clear:both"><br/>
<img src="WebStructurePicture_p.png?host=#[host]#&depth=3&width=1024&height=576&nodes=200&time=1000&colortext=888888&colorback=FFFFFF&colordot0=1111BB&colordota=11BB11&colorline=222222&colorlineend=333333">
</p>
#(/admin)#
</fieldset>
#(/outbound)#
#(inbound)#::
<fieldset><legend>Inbound Links, incoming to #[host]# - Host List</legend>
<ol style="list-style-type : none; padding-left: 0px">
#{list}#
<li style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="HostBrowser.html?admin=#[admin]#&path=#[host]#">#[host]#</a></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</li>
#{list}#
<li style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="IndexBrowser_p.html?path=#[host]#">#[host]#</a></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</li>
#{/list}#
</ol>
</fieldset>
#(/inbound)#
#(authorized)#::
#(admin)#::
#%HostBrowserAdmin_p.html%#
#(/admin)#
#(/authorized)#
#(hosts)#::
<form action="IndexBrowser_p.html" method="get">
<fieldset><legend>Administration Options</legend>
<div>Delete all <span class="error">Load Errors</span> from index <input style="width:240px " type="submit" name="deleteLoadErrors" value="Delete Load Errors" class="btn btn-primary"/></div>
</fieldset>
</form>
#(/hosts)#
#%env/templates/footer.template%#
</body>
</html>

@ -1,5 +1,5 @@
/**
* HostBrowser
* IndexBrowser
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 27.09.2012 at http://yacy.net
*
@ -71,14 +71,14 @@ import net.yacy.server.serverSwitch;
/**
* Browser for indexed resources
*/
public class HostBrowser {
public class IndexBrowser_p {
final static long TIMEOUT = 10000L;
public static enum StoreType {
LINK, INDEX, EXCLUDED, FAILED, RELOAD;
}
/**
* <p>Retrieve local index entries for a path, or for hosts with the most references. Also allow some maintaining operations on entries with load errors.</p>
* <p>Some parameters need administrator authentication or unauthenticated local host requests to be allowed : load, deleteLoadErrors, delete, reload404,
@ -87,36 +87,34 @@ public class HostBrowser {
* <p>
* Configuration settings :
* <ul>
* <li>browser.autoload : allow the administrator to stack URLs to the local crawl queue, manually with the "load" parameter,
* or automatically when the "path" parameter is filled with an unknown URL</li>
* <li>browser.autoload : allow the administrator to stack URLs to the local crawl queue, manually with the "load" parameter,
* or automatically when the "path" parameter is filled with an unknown URL</li>
* <li>browser.load4everyone : allow everyone to stack URLs to the local crawl queue.
* "browser.autoload" has also to be set to true to enable automatic loading on an unknown path</li>
* "browser.autoload" has also to be set to true to enable automatic loading on an unknown path</li>
* <li>publicSearchpage : set to false to restrict use of this servlet to authenticated administrator only</li>
* <li>publicTopmenu : set to false to hide the top navigation bar to non authenticated users</li>
* <li>decoration.hostanalysis : add supplementary hosts information for debug/analysis purpose</li>
* <li>decoration.grafics.linkstructure : display a link structure graph when the path parameter is filled</li>
* </ul>
* </p>
* @param header servlet request header
* @param post request parameters. Supported keys :<ul>
* <li>admin : when "true", display in the html page render the administration context (menu and top navbar)</li>
* <li>path : root URL or host name to browse (ignored when the hosts parameter is filled). When not yet locally indexed, this URL can be automatically crawled and indexed
* when "browser.autoload" or "browser.load4everyone" configuration settings are set to true.</li>
* <li>load : URL to crawl and index.</li>
* <li>deleteLoadErrors : delete from the local index documents with load error (HTTP status different from 200 or any other failure).</li>
* <li>hosts : generate hosts with most references list. Supported values :
* <ul>
* <li>"crawling" : restrict to host currently crawled</li>
* <li>"error" : restrict to hosts with having at least one resource load error</li>
* </ul>
* </li>
* <li>delete : delete from the index whole documents tree matching the path prefix</li>
* <li>reload404 : reload documents matching the path prefix and which previously failed to load due to a network error</li>
* <li>facetcount : </li>
* <li>complete : we want only root paths for complete lists</li>
* <li>nepr :</li>
* <li>showlinkstructure : when present, display a link graph for path</li>
* </ul>
* <li>path : root URL or host name to browse (ignored when the hosts parameter is filled). When not yet locally indexed, this URL can be automatically crawled and indexed
* when "browser.autoload" or "browser.load4everyone" configuration settings are set to true.</li>
* <li>load : URL to crawl and index.</li>
* <li>deleteLoadErrors : delete from the local index documents with load error (HTTP status different from 200 or any other failure).</li>
* <li>hosts : generate hosts with most references list. Supported values :
* <ul>
* <li>"crawling" : restrict to host currently crawled</li>
* <li>"error" : restrict to hosts with having at least one resource load error</li>
* </ul>
* </li>
* <li>delete : delete from the index whole documents tree matching the path prefix</li>
* <li>reload404 : reload documents matching the path prefix and which previously failed to load due to a network error</li>
* <li>facetcount : </li>
* <li>complete : we want only root paths for complete lists</li>
* <li>nepr :</li>
* <li>showlinkstructure : when present, display a link graph for path</li>
* </ul>
* @param env server environment
* @return the servlet answer object
*/
@ -125,53 +123,23 @@ public class HostBrowser {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
Fulltext fulltext = sb.index.fulltext();
final boolean authorized = sb.verifyAuthentication(header);
final boolean autoload = authorized && sb.getConfigBool("browser.autoload", true);
final boolean autoload = sb.getConfigBool("browser.autoload", true);
final boolean load4everyone = sb.getConfigBool("browser.load4everyone", false);
final boolean loadRight = autoload || load4everyone; // add config later
final boolean searchAllowed = sb.getConfigBool(SwitchboardConstants.PUBLIC_SEARCHPAGE, true) || authorized;
final serverObjects prop = new serverObjects();
// set default values
prop.put("path", "");
prop.put("result", "");
prop.put("hosts", 0);
prop.put("files", 0);
prop.put("hostanalysis", 0);
prop.put("admin", "false");
boolean admin = false;
String referer = header.get("Referer", "");
if ((post != null && post.getBoolean("admin")) || referer.contains("HostBrowser.html?admin=true")) {
prop.put("topmenu", 2);
prop.put("admin", "true");
admin = true;
} else if (authorized) { // show top nav to admins
prop.put("topmenu", 1);
} else { // for other respect setting in Search Design Configuration
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
}
final String promoteSearchPageGreeting =
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ?
env.getConfig("network.unit.description", "") :
env.getConfig(SwitchboardConstants.GREETING, "");
prop.put("topmenu_promoteSearchPageGreeting", promoteSearchPageGreeting);
if (!searchAllowed) {
prop.put("result", "You are not allowed to use this page. Please ask an administrator for permission.");
prop.putNum("ucount", 0);
return prop;
}
if(authorized) {
/* Fill the "admin" parameter for authorized links */
prop.put("authorized_admin", Boolean.toString(admin));
}
String path = post == null ? "" : post.get("path", "").trim();
if (authorized) sb.index.fulltext().commit(true);
sb.index.fulltext().commit(true);
if (post == null || env == null) {
prop.putNum("ucount", fulltext.collectionSize());
return prop;
@ -186,8 +154,8 @@ public class HostBrowser {
!path.startsWith("smb://") &&
!path.startsWith("file://"))) { path = "http://" + path; }
prop.putHTML("path", path);
prop.put("delete", authorized && path.length() > 0 ? 1 : 0);
prop.put("delete", path.length() > 0 ? 1 : 0);
DigestURL pathURI = null;
try {pathURI = new DigestURL(path);} catch (final MalformedURLException e) {}
@ -231,61 +199,60 @@ public class HostBrowser {
}
}
if (authorized && post.containsKey("deleteLoadErrors")) {
if (post.containsKey("deleteLoadErrors")) {
try {
fulltext.getDefaultConnector().deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND "
+ CollectionSchema.httpstatus_i.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists
ConcurrentLog.info ("HostBrowser:", "delete documents with httpstatus_i <> 200");
ConcurrentLog.info ("IndexBrowser_p:", "delete documents with httpstatus_i <> 200");
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" );
ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = fail");
ConcurrentLog.info ("IndexBrowser_p:", "delete documents with failtype_s = fail");
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.excl.name() + "\"" );
ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = excl");
ConcurrentLog.info ("IndexBrowser_p:", "delete documents with failtype_s = excl");
prop.putNum("ucount", fulltext.collectionSize());
return prop;
} catch (final IOException ex) {
ConcurrentLog.logException(ex);
}
}
if (post.containsKey("hosts")) {
// generate host list
try {
boolean onlyCrawling = "crawling".equals(post.get("hosts", ""));
boolean onlyErrors = "error".equals(post.get("hosts", ""));
int maxcount = authorized ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
int maxcount = 2 * 3 * 2 * 5 * 7 * 2 * 3; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
// collect hosts from index
ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
if (hostscore == null) hostscore = new ClusteredScoreMap<String>(true);
// collect hosts from crawler
final Map<String, Integer[]> crawler = (authorized) ? sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots) : new HashMap<String, Integer[]>();
final Map<String, Integer[]> crawler = sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots);
final Map<String, Integer> hostNameToPendingCount = new HashMap<>();
for(Entry<String, Integer[]>crawlerEntry: crawler.entrySet()) {
/* The local stack returns keys composed of "hostname:port" : we now sum pending URLs counts by host name */
String hostName = Domains.stripToHostName(crawlerEntry.getKey());
Integer pendingCount = hostNameToPendingCount.get(hostName);
if(pendingCount == null) {
pendingCount = 0;
}
pendingCount += crawlerEntry.getValue()[0];
hostNameToPendingCount.put(hostName, pendingCount);
String hostName = Domains.stripToHostName(crawlerEntry.getKey());
Integer pendingCount = hostNameToPendingCount.get(hostName);
if(pendingCount == null) {
pendingCount = 0;
}
pendingCount += crawlerEntry.getValue()[0];
hostNameToPendingCount.put(hostName, pendingCount);
}
// collect the errorurls
Map<String, ReversibleScoreMap<String>> exclfacets = authorized ? fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.excl.name(), maxcount, CollectionSchema.host_s.getSolrFieldName()) : null;
Map<String, ReversibleScoreMap<String>> exclfacets = fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.excl.name(), maxcount, CollectionSchema.host_s.getSolrFieldName());
ReversibleScoreMap<String> exclscore = exclfacets == null ? new ClusteredScoreMap<String>(true) : exclfacets.get(CollectionSchema.host_s.getSolrFieldName());
Map<String, ReversibleScoreMap<String>> failfacets = authorized ? fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.fail.name(), maxcount, CollectionSchema.host_s.getSolrFieldName()) : null;
Map<String, ReversibleScoreMap<String>> failfacets = fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.fail.name(), maxcount, CollectionSchema.host_s.getSolrFieldName());
ReversibleScoreMap<String> failscore = failfacets == null ? new ClusteredScoreMap<String>(true) : failfacets.get(CollectionSchema.host_s.getSolrFieldName());
int c = 0;
Iterator<String> i = hostscore.keys(false);
String host;
while (i.hasNext() && c < maxcount) {
host = i.next();
prop.put("hosts_list_" + c + "_admin", admin ? "true" : "false");
prop.putHTML("hosts_list_" + c + "_host", host);
boolean inCrawler = hostNameToPendingCount.containsKey(host);
int exclcount = exclscore.get(host);
@ -294,7 +261,7 @@ public class HostBrowser {
prop.put("hosts_list_" + c + "_count", hostscore.get(host));
prop.put("hosts_list_" + c + "_crawler", inCrawler ? 1 : 0);
if (inCrawler) {
prop.put("hosts_list_" + c + "_crawler_pending", hostNameToPendingCount.get(host));
prop.put("hosts_list_" + c + "_crawler_pending", hostNameToPendingCount.get(host));
}
prop.put("hosts_list_" + c + "_errors", errors > 0 ? 1 : 0);
if (errors > 0) {
@ -311,18 +278,18 @@ public class HostBrowser {
}
}
prop.put("hosts_list", c);
prop.put("hosts_authorized", authorized ? 1 : 0);
prop.put("hosts_authorized", 1);
prop.put("hosts", 1);
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
if (path.length() > 0) {
try {
DigestURL uri = new DigestURL(path);
String host = uri.getHost();
// write host analysis if path after host is empty
if (uri.getPath().length() <= 1 && host != null && host.length() > 0 && sb.getConfigBool("decoration.hostanalysis", false)) {
//how many documents per crawldepth_i; get crawldepth_i facet for host
@ -364,27 +331,25 @@ public class HostBrowser {
prop.put("hostanalysis_facets", fc);
prop.put("hostanalysis", 1);
}
// write file list for subpath
boolean delete = false;
boolean reload404 = false;
if (authorized && post.containsKey("delete")) {
if (post.containsKey("delete")) {
// delete the complete path!! That includes everything that matches with this prefix.
delete = true;
}
if (authorized && post.containsKey("reload404")) {
if (post.containsKey("reload404")) {
// try to re-load all urls that have load errors and matches with this prefix.
reload404 = true;
}
int facetcount=post.getInt("facetcount", 0);
int facetcount = post.getInt("facetcount", 0);
boolean complete = post.getBoolean("complete");
if (complete) { // we want only root paths for complete lists
p = path.indexOf('/', 10);
if (p > 0) path = path.substring(0, p + 1);
}
prop.put("files_complete", complete ? 1 : 0);
prop.put("files_complete_admin", admin ? "true" : "false");
prop.putHTML("files_complete_path", path);
p = path.substring(0, path.length() - 1).lastIndexOf('/');
if (p < 8) {
@ -392,15 +357,13 @@ public class HostBrowser {
} else {
prop.put("files_root", 0);
prop.putHTML("files_root_path", path.substring(0, p + 1));
prop.put("files_root_admin", admin ? "true" : "false");
}
// generate file list from path
prop.putHTML("outbound_host", host);
if (authorized) prop.putHTML("outbound_admin_host", host); //used for WebStructurePicture_p link
prop.putHTML("inbound_host", host);
String hosthash = uri.hosthash();
String[] pathparts = uri.getPaths();
// get all files for a specific host from the index
StringBuilder q = new StringBuilder();
if (host == null) {
@ -438,7 +401,7 @@ public class HostBrowser {
CollectionSchema.references_external_i.getSolrFieldName(),
CollectionSchema.references_exthosts_i.getSolrFieldName(),
CollectionSchema.cr_host_chance_d.getSolrFieldName(),
CollectionSchema.cr_host_norm_i.getSolrFieldName()
CollectionSchema.cr_host_norm_i.getSolrFieldName()
));
solrQueryTask.start();
Set<String> storedDocs = new HashSet<String>();
@ -455,68 +418,68 @@ public class HostBrowser {
long timeoutReferences = System.currentTimeMillis() + 6000;
ReferenceReportCache rrCache = sb.index.getReferenceReportCache();
try {
SolrDocument doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS);
while (doc != AbstractSolrConnector.POISON_DOCUMENT && doc != null) {
String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
FailType error = errortype == null ? null : FailType.valueOf(errortype);
String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
infoCache.put(ids, new InfoCacheEntry(sb.index.fulltext(), rrCache, doc, ids, System.currentTimeMillis() < timeoutReferences));
if (u.startsWith(path)) {
if (delete) {
deleteIDs.add(ids);
} else {
if (error == null) storedDocs.add(u); else {
if (reload404 && error == FailType.fail) {
ArrayList<String> collections = (ArrayList<String>) doc.getFieldValue(CollectionSchema.collection_sxt.getSolrFieldName());
if (collections != null) reloadURLCollection.addAll(collections);
reloadURLs.add(u);
}
if (authorized) errorDocs.put(u, error);
}
}
} else if (complete) {
if (error == null) storedDocs.add(u); else {
if (authorized) errorDocs.put(u, error);
}
}
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); // add the current link
if (error == null) {
hostsize++;
// collect inboundlinks to browse the host
Iterator<String> links = URIMetadataNode.getLinks(doc, true);
while (links.hasNext()) {
u = links.next();
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u);
}
// collect referrer links
links = URIMetadataNode.getLinks(doc, false);
while (links.hasNext()) {
u = links.next();
try {
MultiProtocolURL mu = new MultiProtocolURL(u);
if (mu.getHost() != null) {
ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
if (lks == null) {
lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
outboundHosts.put(mu.getHost(), lks);
}
lks.set(u, u.length());
}
} catch (final MalformedURLException e) {}
}
}
remainingTime = timeoutList - System.currentTimeMillis();
if (remainingTime <= 0) {
break;
}
doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS);
}
SolrDocument doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS);
while (doc != AbstractSolrConnector.POISON_DOCUMENT && doc != null) {
String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
FailType error = errortype == null ? null : FailType.valueOf(errortype);
String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
infoCache.put(ids, new InfoCacheEntry(sb.index.fulltext(), rrCache, doc, ids, System.currentTimeMillis() < timeoutReferences));
if (u.startsWith(path)) {
if (delete) {
deleteIDs.add(ids);
} else {
if (error == null) storedDocs.add(u); else {
if (reload404 && error == FailType.fail) {
ArrayList<String> collections = (ArrayList<String>) doc.getFieldValue(CollectionSchema.collection_sxt.getSolrFieldName());
if (collections != null) reloadURLCollection.addAll(collections);
reloadURLs.add(u);
}
errorDocs.put(u, error);
}
}
} else if (complete) {
if (error == null) storedDocs.add(u); else {
errorDocs.put(u, error);
}
}
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); // add the current link
if (error == null) {
hostsize++;
// collect inboundlinks to browse the host
Iterator<String> links = URIMetadataNode.getLinks(doc, true);
while (links.hasNext()) {
u = links.next();
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u);
}
// collect referrer links
links = URIMetadataNode.getLinks(doc, false);
while (links.hasNext()) {
u = links.next();
try {
MultiProtocolURL mu = new MultiProtocolURL(u);
if (mu.getHost() != null) {
ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
if (lks == null) {
lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
outboundHosts.put(mu.getHost(), lks);
}
lks.set(u, u.length());
}
} catch (final MalformedURLException e) {}
}
}
remainingTime = timeoutList - System.currentTimeMillis();
if (remainingTime <= 0) {
break;
}
doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS);
}
} finally {
/* Ensure termination and proper resources release of the query thread */
solrQueryTask.interrupt();
/* Ensure termination and proper resources release of the query thread */
solrQueryTask.interrupt();
}
if (deleteIDs.size() > 0) sb.remove(deleteIDs);
if (reloadURLs.size() > 0) {
@ -524,19 +487,19 @@ public class HostBrowser {
for (String collection: reloadURLCollection) cm.put(collection, QueryParams.catchall_pattern);
sb.reload(reloadURLs, cm.size() > 0 ? cm : CrawlProfile.collectionParser("user"), false);
}
// collect from crawler
List<Request> domainStackReferences = (authorized) ? sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000) : new ArrayList<Request>(0);
List<Request> domainStackReferences = sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000);
Set<String> loadingLinks = new HashSet<String>();
for (Request crawlEntry: domainStackReferences) loadingLinks.add(crawlEntry.url().toNormalform(true));
// now combine all lists into one
Map<String, StoreType> files = new HashMap<String, StoreType>();
for (String u: storedDocs) files.put(u, StoreType.INDEX);
for (Map.Entry<String, FailType> e: errorDocs.entrySet()) files.put(e.getKey(), e.getValue() == FailType.fail ? StoreType.FAILED : StoreType.EXCLUDED);
for (String u: inboundLinks) if (!files.containsKey(u)) files.put(u, StoreType.LINK);
for (String u: loadingLinks) if (u.startsWith(path) && !files.containsKey(u)) files.put(u, StoreType.LINK);
ConcurrentLog.info("HostBrowser", "collected " + files.size() + " urls for path " + path);
ConcurrentLog.info("IndexBrowser_p", "collected " + files.size() + " urls for path " + path);
// distinguish files and folders
Map<String, Object> list = new TreeMap<String, Object>(); // a directory list; if object is boolean, its a file; if its a int[], then its a folder
@ -575,7 +538,7 @@ public class HostBrowser {
}
}
}
int maxcount = 1000;
int c = 0;
// first list only folders
@ -587,7 +550,6 @@ public class HostBrowser {
// this is a folder
prop.put("files_list_" + c + "_type", 1);
prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
int linked = ((int[]) entry.getValue())[0];
int stored = ((int[]) entry.getValue())[1];
int crawler = ((int[]) entry.getValue())[2];
@ -610,7 +572,6 @@ public class HostBrowser {
// this is a file
prop.put("files_list_" + c + "_type", 0);
prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
StoreType type = (StoreType) entry.getValue();
try {uri = new DigestURL(entry.getKey());} catch (final MalformedURLException e) {uri = null;}
HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash()); // todo: cannot identify errors
@ -640,7 +601,6 @@ public class HostBrowser {
if (loadRight) {
prop.putHTML("files_list_" + c + "_type_stored_load_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_stored_load_path", path);
prop.putHTML("files_list_" + c + "_type_stored_load_admin", Boolean.toString(admin));
}
if (++c >= maxcount) break;
}
@ -669,7 +629,6 @@ public class HostBrowser {
Iterator<String> i = score.keys(false);
while (i.hasNext() && c < maxcount) {
host = i.next();
prop.put("inbound_list_" + c + "_admin", admin ? "true" : "false");
prop.putHTML("inbound_list_" + c + "_host", sb.webStructure.hostHash2hostName(host));
prop.put("inbound_list_" + c + "_count", score.get(host));
c++;
@ -679,7 +638,7 @@ public class HostBrowser {
} else {
prop.put("inbound", 0);
}
// generate outbound-links table
if (outboundHosts.size() > 0) {
maxcount = 200;
@ -692,7 +651,6 @@ public class HostBrowser {
prop.putHTML("outbound_list_" + c + "_host", host);
prop.put("outbound_list_" + c + "_count", score.get(host));
prop.put("outbound_list_" + c + "_link", outboundHosts.get(host).getMinKey());
prop.put("outbound_list_" + c + "_admin", admin ? "true" : "false");
c++;
}
prop.put("outbound_list", c);
@ -700,7 +658,7 @@ public class HostBrowser {
} else {
prop.put("outbound", 0);
}
} catch (final Throwable e) {
ConcurrentLog.logException(e);
}

@ -1,5 +1,5 @@
<?xml version="1.0"?>
<hostbrowser>
<indexbrowser>
#(hosts)#::
<hosts>
#{list}#
@ -29,4 +29,4 @@
#{/list}#
</inbound>
#(/inbound)#
</hostbrowser>
</indexbrowser>

@ -84,7 +84,7 @@ function updatepage(str) {
<dd>
<input type="text" size="60" name="url" id="url" value="#[url]#" />
<input type="submit" name="show" class="btn btn-primary" value="Show Metadata" />
#(moar)#::<input type="button" value="Browse Host" class="btn btn-default" onClick="location.href='HostBrowser.html?path=' + document.getElementById('url').value" />#(/moar)#
#(moar)#::<input type="button" value="Browse Host" class="btn btn-default" onClick="location.href='IndexBrowser_p.html?path=' + document.getElementById('url').value" />#(/moar)#
<div id="searchresults"></div>
</dd>
#(moar)#::

@ -191,7 +191,7 @@
<li><h3>Monitoring</h3></li>
<li><a href="Status.html?noforward=" class="MenuItemLink">System Status</a></li>
#(navigation-p2p)#::<li><a href="Network.html" accesskey="w" class="MenuItemLink">Peer-to-Peer Network</a></li>#(/navigation-p2p)#
#(navigation-crawlmonitor)#::<li><a href="HostBrowser.html?admin=true&hosts=" class="MenuItemLink">Index Browser</a></li>#(/navigation-crawlmonitor)#
#(navigation-crawlmonitor)#::<li><a href="IndexBrowser_p.html?admin=true&hosts=" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Index Browser</a></li>#(/navigation-crawlmonitor)#
<li><a href="AccessGrid_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Network Access</a></li>
#(navigation-crawlmonitor)#::<li><a href="Crawler_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Crawler Monitor</a></li>#(/navigation-crawlmonitor)#
<!-- <li><a href="terminal_p.html" accesskey="t" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Terminal</a></li> -->

@ -40,7 +40,6 @@
<li id="header_websearch"><a href="index.html#(authSearch)#::?auth#(/authSearch)#" onclick="this.href='index.html?#(authSearch)#::auth&#(/authSearch)#former='+encodeURIComponent(document.searchform.search.value)">Web Search</a></li>
<li id="header_filesearch"><a href="yacyinteractive.html" onclick="this.href='yacyinteractive.html?handover='+document.searchform.search.value">File Search</a></li>
<li id="header_comparesearch"><a href="compare_yacy.html?display=0">Compare Search</a></li>
<li id="header_hostbrowser"><a href="HostBrowser.html?hosts=">Index Browser</a></li>
<li id="header_urlviewer"><a href="ViewFile.html">URL Viewer</a></li>
<!--<li><a href="yacysearch_location.html">Location Search</a></li>-->
<li class="divider" role="separator"></li>

@ -20,7 +20,6 @@
<li id="header_websearch"><a href="index.html#(authorized)#::?auth#(/authorized)#" onclick="this.href='index.html?#(authorized)#::auth&#(/authorized)#former='+encodeURIComponent(document.searchform.search.value)">Web Search</a></li>
<li id="header_filesearch"><a href="yacyinteractive.html" onclick="this.href='yacyinteractive.html?handover='+document.searchform.search.value">File Search</a></li>
<li id="header_comparesearch"><a href="compare_yacy.html?display=0">Compare Search</a></li>
<li id="header_hostbrowser"><a href="HostBrowser.html?hosts=">Index Browser</a></li>
<li id="header_urlviewer"><a href="ViewFile.html">URL Viewer</a></li>
<!--<li><a href="yacysearch_location.html">Location Search</a></li>-->
<li class="divider" role="separator"></li>

@ -1,7 +1,7 @@
<div class="SubMenu">
<h3>Web Visualization</h3>
<ul class="SubMenu">
<li><a href="HostBrowser.html?admin=true" class="MenuItemLink">Index Browser</a></li>
<li><a href="IndexBrowser_p.html?admin=true" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Index Browser</a></li>
<li><a href="WatchWebStructure_p.html?host=auto&amp;depth=3&amp;time=1000" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Web Structure</a></li>
<li><a href="Collage.html" class="MenuItemLink">Image Collage</a></li>
</ul>

@ -4,7 +4,7 @@ User-agent: *
Disallow: /*query=
Disallow: /proxy.html
Disallow: /HostBrowser.html
Disallow: /IndexBrowser_p.html
#(all)#

@ -47,7 +47,7 @@
#(showPictures)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="yacysearch.html?contentdom=image#(authSearch)#::&auth#(/authSearch)#&url=#[link]#&query=#[former]#+inurl:#[link]#" target="_blank">Pictures</a>#(/showPictures)#
#(showCache)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="CacheResource_p.html?url=#[link]#" target="_blank">Cache</a>#(/showCache)#
#(showProxy)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="proxy.html?url=#[link]#" target="_blank">View via proxy</a>#(/showProxy)#
#(showHostBrowser)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="HostBrowser.html?path=#[link]#"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a>#(/showHostBrowser)#
#(showIndexBrowser)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="IndexBrowser_p.html?path=#[link]#"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a>#(/showIndexBrowser)#
#(showVocabulary)#::<br/>#{vocabulary}##[name]#:#[terms]# #{/vocabulary}##(/showVocabulary)#
#(showSnapshots)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="#[link]#" target="_blank">#[extension]# Snapshot</a>#(/showSnapshots)#
#(showRanking)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><span title="Raw ranking score value">Ranking: #[ranking]#</span>#(/showRanking)#

@ -276,7 +276,7 @@ public class yacysearchitem {
prop.put("content_showPictures", sb.getConfigBool("search.result.show.pictures", true) ? 1 : 0);
prop.put("content_showCache", sb.getConfigBool("search.result.show.cache", true) && Cache.has(resultURL.hash()) ? 1 : 0);
prop.put("content_showProxy", sb.getConfigBool("search.result.show.proxy", true) && sb.getConfigBool("proxyURL", false) ? 1 : 0);
prop.put("content_showHostBrowser", sb.getConfigBool("search.result.show.hostbrowser", true) ? 1 : 0);
prop.put("content_showIndexBrowser", sb.getConfigBool("search.result.show.indexbrowser", true) ? 1 : 0);
prop.put("content_showSnapshots", snapshotPaths != null && snapshotPaths.size() > 0 && sb.getConfigBool("search.result.show.snapshots", true) ? 1 : 0);
prop.put("content_showVocabulary", sb.getConfigBool("search.result.show.vocabulary", true) ? 1 : 0);
prop.put("content_showRanking", sb.getConfigBool("search.result.show.ranking", false) ? 1 : 0);
@ -331,7 +331,7 @@ public class yacysearchitem {
prop.putUrlEncodedHTML("content_showPictures_former", origQ);
prop.put("content_showCache_link", resultUrlstring);
prop.put("content_showProxy_link", resultUrlstring);
prop.put("content_showHostBrowser_link", resultUrlstring);
prop.put("content_showIndexBrowser_link", resultUrlstring);
if (sb.getConfigBool("search.result.show.vocabulary", true)) {
int c = 0;
for (String key: result.getFieldNames()) {

@ -1427,7 +1427,7 @@ More Tutorials==Mehr Tutorials
Please see the tutorials on==Bitte besuchen Sie auch die Anleitungen auf
#-----------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
#Index Browser==Index Browser
Browse the index of #[ucount]# documents.==Durchsuchen Sie den Index von #[ucount]# Dokumenten.
@ -1468,10 +1468,6 @@ Inbound Links, incoming to #[host]# - Host List==Eingehende Links, eingehend auf
##[count]# URLs==#[count]# URL(s)
#Administration Options==Administration Optionen
<html lang="en">==<html lang="de">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Administrator Optionen
Delete all==Lösche alle
>Load Errors<==>Ladefehler<

@ -668,7 +668,7 @@ Download from Vimeo==Descargar desde vimeo
More Tutorials==Más tutoriales
#-----------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
>all hosts<==>todos los hosts<
> or <==> o <
@ -679,10 +679,6 @@ Documents without Errors==Documentos sin errores
>Path<==>Ruta<
Administration Options==Opciones de Administración
<html lang="en">==<html lang="es">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Opciones de Administración
Delete all==Eliminar todo
>Load Errors<==>Errores de carga<

@ -1000,7 +1000,7 @@ This can be done using the <a href="CrawlStartExpert.html">Expert Crawl Start</a
The scheduler on crawls can be changed or removed using the <a href="Table_API_p.html">API Steering==Le planificateur de balayage peut &ecirc;tre modifi&eacute; ou supprim&eacute; au moyen de la <a href="Table_API_p.html">commande de l'API
#---------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
Browse the index of #[ucount]# documents.==Naviguer dans les #[ucount]# documents de l'index.
Enter a host or an URL for a file list or view a list of==Saisir un nom de domaine ou une URL pour obtenir une liste de fichiers, ou visualiser la liste de
@ -1041,10 +1041,6 @@ Inbound Links, incoming to #[host]# - Host List==Liens entrants, vers #[host]# -
'number of documents about this date'=='Nombre de documents liés à cette date'
"show link structure graph"=="Afficher le graphique d'arborescence de liens"
Host has load error(s)==Erreur(s) de chargement sur ce domaine
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Options d'administration
Delete all==Supprimer toutes les
>Load Errors<==>erreurs de chargement<

@ -647,7 +647,7 @@ Download from Vimeo==Scarica da Vimeo
More Tutorials==Altri tutorial
#-----------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
>all hosts<==>tutti gli host<
> or <==> o <
@ -658,10 +658,6 @@ Documents without Errors==Documenti privi di errori
>Path<==>Percorso<
Administration Options==Opzioni amministrazione
<html lang="en">==<html lang="it">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Opzioni di amministrazione
Delete all==Elimina tutti
>Load Errors<==>Errori di caricamento<

@ -829,14 +829,10 @@ YaCy: Tutorial==YaCy: チュートリアル
>Tutorial==>チュートリアル
#-----------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
Index Browser==索引ブラウザー
#Administration Options==管理オプション
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==管理オプション
Delete all==全部を削除する
>Load Errors<==>読み込みエラー<

@ -3987,7 +3987,7 @@
</body>
</file>
<file original="HostBrowser.html" source-language="en" datatype="html">
<file original="IndexBrowser_p.html" source-language="en" datatype="html">
<body>
<trans-unit id="c472c59a" xml:space="preserve" approved="no" translate="yes">
<source>Index Browser</source>
@ -4106,11 +4106,6 @@
<trans-unit id="9c0d6fd2" xml:space="preserve" approved="no" translate="yes">
<source>Host has load error(s)</source>
</trans-unit>
</body>
</file>
<file original="HostBrowserAdmin_p.html" source-language="en" datatype="html">
<body>
<trans-unit id="dfa6a8dc" xml:space="preserve" approved="no" translate="yes">
<source>Administration Options</source>
</trans-unit>

@ -1546,7 +1546,7 @@ More Tutorials==Больше инструкций
Please see the tutorials on==Пожалуйста, смотрите больше инструкций на
#-----------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
Index Browser==Просмотр хостов
Browse the index of #[ucount]# documents.==В индексе находится #[ucount]# документов.
@ -1590,10 +1590,6 @@ Inbound Links, incoming to #[host]# - Host List==Внутренние ссылк
#browse #[host]#==просмотр #[host]#
##[count]# URLs==#[count]# ссылок
<html lang="en">==<html lang="ru">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Расширенные опции
Delete all==Удалить все
>Load Errors<==>ошибки загрузки<

@ -1645,7 +1645,7 @@ You can create your own search index with YaCy==您可以用YaCy创建属于自
To learn how to do that, watch one of the demonstration videos below==观看以下demo视频以了解更多
#-----------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
Index Browser==索引浏览器
Browse the index of #[ucount]# documents.== 浏览来自 #[ucount]# 篇文档的索引.
@ -1685,10 +1685,6 @@ Inbound Links, incoming to #[host]# - Host List==入站链接,传入#[host]# -
'number of documents about this date'=='在这个日期的文件数量'
"show link structure graph"=="展示连接结构图"
Host has load error(s)==主机有加载错误项
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==管理选项
Delete all==全部删除
>Load Errors<==>加载错误<

@ -49,7 +49,7 @@ public enum CollectionSchema implements SchemaDeclaration {
httpstatus_i(SolrType.num_integer, true, true, false, false, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded", true),
url_file_ext_s(SolrType.string, true, true, false, false, true, "the file name extension", true),
host_organization_s(SolrType.string, true, true, false, false, true, "either the second level domain or, if a ccSLD is used, the third level domain", true), // needed to search in the url
inboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "internal links, the url only without the protocol", true), // needed for HostBrowser
inboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "internal links, the url only without the protocol", true), // needed for IndexBrowser
inboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "internal links, only the protocol", true), // for correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed
outboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "external links, only the protocol", true), // for correct assembly of outboundlinks outboundlinks_protocol_sxt + outboundlinks_urlstub_sxt is needed
outboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "external links, the url only without the protocol", true), // needed to enhance the crawler

Loading…
Cancel
Save