turned HostBrowser into a admin-only page, now called IndexBrowser

This was required because spiders and bots crawled through this page and
created load on the peer without use for the user or the YaCy network.
pull/402/head
Michael Peter Christen 4 years ago
parent d359d521a1
commit c0d9a3e9a7

@ -51,7 +51,7 @@ url_file_ext_s
## either the second level domain or, if a ccSLD is used, the third level domain. Needed to search in the url ## either the second level domain or, if a ccSLD is used, the third level domain. Needed to search in the url
host_organization_s host_organization_s
## internal links, only the protocol. Needed for HostBrowser ## internal links, only the protocol. Needed for IndexBrowser
inboundlinks_protocol_sxt inboundlinks_protocol_sxt
## internal links, the url only without the protocol. For correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed ## internal links, the url only without the protocol. For correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed

@ -939,7 +939,7 @@ search.result.show.citation = true
search.result.show.pictures = false search.result.show.pictures = false
search.result.show.cache = true search.result.show.cache = true
search.result.show.proxy = false search.result.show.proxy = false
search.result.show.hostbrowser = true search.result.show.indexbrowser = true
search.result.show.vocabulary = false search.result.show.vocabulary = false
# Set of comma separated vocabulary names not to be used as search results facets # Set of comma separated vocabulary names not to be used as search results facets
search.result.show.vocabulary.omit = search.result.show.vocabulary.omit =
@ -1142,7 +1142,7 @@ svnRevision=0
currentSkin=pdblue currentSkin=pdblue
# flag to show if pages shall be usable for non-admin users # flag to show if pages shall be usable for non-admin users
# this can be applied to the Surftips.html, yacysearch.html and HostBrowser.html pages # this can be applied to the Surftips.html, yacysearch.html and IndexBrowser_p.html pages
publicSurftips = true publicSurftips = true
publicSearchpage = true publicSearchpage = true

@ -294,7 +294,7 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="picturesLink" href="yacysearch.html" target="LayouTest">Pictures</a></td> <td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="picturesLink" href="yacysearch.html" target="LayouTest">Pictures</a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="cacheLink" href="CacheResource_p.html" target="LayouTest">Cache</a></td> <td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="cacheLink" href="CacheResource_p.html" target="LayouTest">Cache</a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="proxyLink" href="proxy.html" target="LayouTest">View via Proxy</a></td> <td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="proxyLink" href="proxy.html" target="LayouTest">View via Proxy</a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="browseIndexLink" href="HostBrowser.html" target="LayouTest"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a></td> <td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="browseIndexLink" href="IndexBrowser_p.html" target="LayouTest"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a></td>
<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="snapshotsLink" href="yacysearch.html" target="LayouTest">JPG Snapshot</a></td> <td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a id="snapshotsLink" href="yacysearch.html" target="LayouTest">JPG Snapshot</a></td>
#(search.result.show.ranking)#::<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><span title="Raw ranking score value">Ranking: 1.12195955E9</span>#(/search.result.show.ranking)# #(search.result.show.ranking)#::<td><span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><span title="Raw ranking score value">Ranking: 1.12195955E9</span>#(/search.result.show.ranking)#
</tr> </tr>
@ -314,7 +314,7 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
menu: System Administration > Advanced Settings menu: System Administration > Advanced Settings
</span></span> </span></span>
</td> </td>
<td style="text-align:center;"><input type="checkbox" name="search.result.show.hostbrowser" aria-labelledby="browseIndexLink" value="true" #(search.result.show.hostbrowser)#::checked="checked" #(/search.result.show.hostbrowser)# /></td> <td style="text-align:center;"><input type="checkbox" name="search.result.show.indexbrowser" aria-labelledby="browseIndexLink" value="true" #(search.result.show.indexbrowser)#::checked="checked" #(/search.result.show.indexbrowser)# /></td>
<td style="text-align:center;"><input type="checkbox" name="search.result.show.snapshots" aria-labelledby="snapshotsLink" value="true" #(search.result.show.snapshots)#::checked="checked" #(/search.result.show.snapshots)# /></td> <td style="text-align:center;"><input type="checkbox" name="search.result.show.snapshots" aria-labelledby="snapshotsLink" value="true" #(search.result.show.snapshots)#::checked="checked" #(/search.result.show.snapshots)# /></td>
#(search.result.show.ranking)#::<td style="text-align:center;"> #(search.result.show.ranking)#::<td style="text-align:center;">
<span class="info" style="padding-left: 10px"><img src="env/grafics/i16.gif" alt="info"/><span> <span class="info" style="padding-left: 10px"><img src="env/grafics/i16.gif" alt="info"/><span>

@ -97,7 +97,7 @@ public class ConfigSearchPage_p {
sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures")); sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures"));
sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache")); sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache"));
sb.setConfig("search.result.show.proxy", post.getBoolean("search.result.show.proxy")); sb.setConfig("search.result.show.proxy", post.getBoolean("search.result.show.proxy"));
sb.setConfig("search.result.show.hostbrowser", post.getBoolean("search.result.show.hostbrowser")); sb.setConfig("search.result.show.indexbrowser", post.getBoolean("search.result.show.indexbrowser"));
sb.setConfig("search.result.show.snapshots", post.getBoolean("search.result.show.snapshots")); sb.setConfig("search.result.show.snapshots", post.getBoolean("search.result.show.snapshots"));
// construct navigation String // construct navigation String
@ -187,7 +187,7 @@ public class ConfigSearchPage_p {
sb.setConfig("search.result.show.pictures", config.getProperty("search.result.show.pictures","false")); sb.setConfig("search.result.show.pictures", config.getProperty("search.result.show.pictures","false"));
sb.setConfig("search.result.show.cache", config.getProperty("search.result.show.cache","true")); sb.setConfig("search.result.show.cache", config.getProperty("search.result.show.cache","true"));
sb.setConfig("search.result.show.proxy", config.getProperty("search.result.show.proxy","false")); sb.setConfig("search.result.show.proxy", config.getProperty("search.result.show.proxy","false"));
sb.setConfig("search.result.show.hostbrowser", config.getProperty("search.result.show.hostbrowser","true")); sb.setConfig("search.result.show.indexbrowser", config.getProperty("search.result.show.indexbrowser","true"));
sb.setConfig("search.result.show.snapshots", config.getProperty("search.result.show.snapshots","true")); sb.setConfig("search.result.show.snapshots", config.getProperty("search.result.show.snapshots","true"));
sb.setConfig(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT, sb.setConfig(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT,
config.getProperty(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT, config.getProperty(SwitchboardConstants.SEARCH_NAVIGATION_MAXCOUNT,
@ -247,7 +247,7 @@ public class ConfigSearchPage_p {
prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0); prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0);
prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0); prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0);
prop.put("search.result.show.proxy", sb.getConfigBool("search.result.show.proxy", false) ? 1 : 0); prop.put("search.result.show.proxy", sb.getConfigBool("search.result.show.proxy", false) ? 1 : 0);
prop.put("search.result.show.hostbrowser", sb.getConfigBool("search.result.show.hostbrowser", false) ? 1 : 0); prop.put("search.result.show.indexbrowser", sb.getConfigBool("search.result.show.indexbrowser", false) ? 1 : 0);
prop.put("search.result.show.snapshots", sb.getConfigBool("search.result.show.snapshots", false) ? 1 : 0); prop.put("search.result.show.snapshots", sb.getConfigBool("search.result.show.snapshots", false) ? 1 : 0);
prop.put("search.result.show.ranking", sb.getConfigBool(SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING, SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING_DEFAULT) ? 1 : 0); prop.put("search.result.show.ranking", sb.getConfigBool(SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING, SwitchboardConstants.SEARCH_RESULT_SHOW_RANKING_DEFAULT) ? 1 : 0);

@ -1,8 +0,0 @@
#(hosts)#::
<form action="HostBrowser.html" method="get">
<fieldset><legend>Administration Options</legend>
<div>Delete all <span class="error">Load Errors</span> from index <input style="width:240px " type="submit" name="deleteLoadErrors" value="Delete Load Errors" class="btn btn-primary"/></div>
</fieldset>
</form>
#(/hosts)#

@ -47,7 +47,7 @@ function updatepage(str) {
html += "<tbody>"; html += "<tbody>";
for (var i = 0; i < firstChannel.items.length; i++) { for (var i = 0; i < firstChannel.items.length; i++) {
item = firstChannel.items[i]; item = firstChannel.items[i];
html += "<tr class=\"TableCellLight\"><td style=\"padding: 2px;text-align:left;\"><a href=\"HostBrowser.html?admin=#[admin]#&pathsearch=&amp;path=" + item.link + "\">" + item.link + "<\/a><\/td>"; html += "<tr class=\"TableCellLight\"><td style=\"padding: 2px;text-align:left;\"><a href=\"IndexBrowser_p.html?pathsearch=&amp;path=" + item.link + "\">" + item.link + "<\/a><\/td>";
} }
html += "</tbody><\/table>"; html += "</tbody><\/table>";
} }
@ -60,27 +60,12 @@ function updatepage(str) {
<link href="env/hypertree.css" rel="stylesheet"> <link href="env/hypertree.css" rel="stylesheet">
</head> </head>
<body id="IndexControl"> <body id="IndexControl">
#(topmenu)# #%env/templates/header.template%#
<div class="SubMenu">
<ul class="SubMenu">
<li style="width:15%;"><a class="MenuItemLink" href="index.html"><img src="env/grafics/navsl.gif" height="10px" style="padding-right:10px" align="baseline"/>back to start page</a></li>
</ul>
</div>
#%env/templates/embeddedheader.template%#
::
#%env/templates/simpleheader.template%#
<script type="text/javascript">
document.getElementById("header_hostbrowser").className += " active";
</script>
::
#%env/templates/header.template%#
#%env/templates/submenuWebStructure.template%# #%env/templates/submenuWebStructure.template%#
#(/topmenu)#
<h1>Index Browser</h1> <h1>Index Browser</h1>
<p>Browse the index of #[ucount]# documents. Enter a host or an URL for a file list or view a list of <a href="HostBrowser.html?admin=#[admin]#&hosts=">all hosts</a>#(authorized)#::, <a href="HostBrowser.html?admin=#[admin]#&hosts=crawling">only hosts with urls pending in the crawler</a> or <a href="HostBrowser.html?admin=#[admin]#&hosts=error">only with load errors</a>#(/authorized)#.</p> <p>Browse the index of #[ucount]# documents. Enter a host or an URL for a file list or view a list of <a href="IndexBrowser_p.html?hosts=">all hosts</a>#(authorized)#::, <a href="IndexBrowser_p.html?hosts=crawling">only hosts with urls pending in the crawler</a> or <a href="IndexBrowser_p.html?hosts=error">only with load errors</a>#(/authorized)#.</p>
<form action="HostBrowser.html" id="searchform" method="get" role="search"> <form action="IndexBrowser_p.html" id="searchform" method="get" role="search">
<fieldset class="yacys"> <fieldset class="yacys">
<input type="hidden" name="admin" id="admin" value="#[admin]#" />
<div class="input-group" style="width:600px;float:left;"> <div class="input-group" style="width:600px;float:left;">
<label for="search" class="input-group-addon">Host/URL</label> <label for="search" class="input-group-addon">Host/URL</label>
<input id="search" type="search" name="path" value="#[path]#" maxlength="250" class="form-control" onkeyup="xmlhttpPost(); return false;"/> <input id="search" type="search" name="path" value="#[path]#" maxlength="250" class="form-control" onkeyup="xmlhttpPost(); return false;"/>
@ -96,17 +81,17 @@ function updatepage(str) {
</fieldset> </fieldset>
</form> </form>
#[result]# #[result]#
#(hosts)#:: #(hosts)#::
<fieldset><legend>Host List</legend> <fieldset><legend>Host List</legend>
<ol style="list-style-type : none; padding-left: 0px"> <ol style="list-style-type : none; padding-left: 0px">
#{list}# #{list}#
<li style="float:left; padding:1px 5px 1px 5px;"> <li style="float:left; padding:1px 5px 1px 5px;">
<div style="width:180px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><img src="env/grafics/#(type)#invisible.png::burn-e.gif::construction.gif#(/type)#" alt="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" title="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" style="float:left" width="12" height="8">&nbsp;<a href="HostBrowser.html?admin=#[admin]#&path=#[host]#&facetcount=#[count]#">#[host]#</a></div></div> <div style="width:180px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><img src="env/grafics/#(type)#invisible.png::burn-e.gif::construction.gif#(/type)#" alt="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" title="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#" style="float:left" width="12" height="8">&nbsp;<a href="IndexBrowser_p.html?path=#[host]#&facetcount=#[count]#">#[host]#</a></div></div>
<div style="width:120px; text-align:right; float: left; white-space:nowrap; overflow:hidden;"><span class="commit" aria-label="#[count]# valid">#[count]#</span>#(crawler)#::/<span class="pending" aria-label="#[pending]# pending">#[pending]#</span>#(/crawler)##(errors)#::/<span class="info" aria-label="#[exclcount]# excluded">#[exclcount]#</span>/<span class="error" aria-label="#[failcount]# failed">#[failcount]#</span>#(/errors)# URLs</div> <div style="width:120px; text-align:right; float: left; white-space:nowrap; overflow:hidden;"><span class="commit" aria-label="#[count]# valid">#[count]#</span>#(crawler)#::/<span class="pending" aria-label="#[pending]# pending">#[pending]#</span>#(/crawler)##(errors)#::/<span class="info" aria-label="#[exclcount]# excluded">#[exclcount]#</span>/<span class="error" aria-label="#[failcount]# failed">#[failcount]#</span>#(/errors)# URLs</div>
</li> </li>
#{/list}# #{/list}#
</ol> </ol>
<div style="clear:both; float:left; padding:10px 5px 1px 5px;"> <div style="clear:both; float:left; padding:10px 5px 1px 5px;">
<span>Count Colors:</span> <span>Count Colors:</span>
<span class="commit">&nbsp;&nbsp;&nbsp;Documents without Errors</span> <span class="commit">&nbsp;&nbsp;&nbsp;Documents without Errors</span>
@ -117,7 +102,7 @@ function updatepage(str) {
#(/authorized)# #(/authorized)#
</div> </div>
</fieldset> </fieldset>
<link rel="stylesheet" href="env/morris.css"> <link rel="stylesheet" href="env/morris.css">
<script src="js/raphael.min.js"></script> <script src="js/raphael.min.js"></script>
<script src="js/morris.js"></script> <script src="js/morris.js"></script>
@ -125,100 +110,100 @@ function updatepage(str) {
<div id="graph" style="height:200px"></div> <div id="graph" style="height:200px"></div>
<script> <script>
var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=0&wt=json&facet=true&facet.field=dates_in_content_dts&facet.sort=index", function(data) { var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=0&wt=json&facet=true&facet.field=dates_in_content_dts&facet.sort=index", function(data) {
dates_in_content_dts = data.facet_counts.facet_fields.dates_in_content_dts; dates_in_content_dts = data.facet_counts.facet_fields.dates_in_content_dts;
var parsed = []; var parsed = [];
for (var i = 0; i < dates_in_content_dts.length; i = i + 2) { for (var i = 0; i < dates_in_content_dts.length; i = i + 2) {
var date = dates_in_content_dts[i]; var date = dates_in_content_dts[i];
var count = dates_in_content_dts[i + 1]; var count = dates_in_content_dts[i + 1];
if (date && count) {parsed[parsed.length] = {x: date,y: count};}; if (date && count) {parsed[parsed.length] = {x: date,y: count};};
}; };
if (parsed.length > 0) { if (parsed.length > 0) {
var histogram = Morris.Bar({ var histogram = Morris.Bar({
element: 'graph', element: 'graph',
data: parsed, data: parsed,
xkey: 'x', xkey: 'x',
ykeys: ['y'], ykeys: ['y'],
labels: ['number of documents about this date'], labels: ['number of documents about this date'],
yLabelFormat: function (y) { return y.toString() + ' docs'; }, yLabelFormat: function (y) { return y.toString() + ' docs'; },
barColors: function (row, series, type) { barColors: function (row, series, type) {
var d = new Date(row.label); var d = new Date(row.label);
if (d.getDay() === 6) return '#4aaf46'; //saturday if (d.getDay() === 6) return '#4aaf46'; //saturday
if (d.getDay() === 0) return '#4aaf46'; //sunday if (d.getDay() === 0) return '#4aaf46'; //sunday
return '#3574c0'; return '#3574c0';
}, },
hideHover: 'false' hideHover: 'false'
}).on('click', function(i, row) { }).on('click', function(i, row) {
console.log(i, row); console.log(i, row);
}); });
/* Add keyboard navigation support and accessible attributes */ /* Add keyboard navigation support and accessible attributes */
makeAccessibleMorrisBar(histogram, makeAccessibleMorrisBar(histogram,
"Number of documents per date histogram", "Number of documents per date histogram",
function(data) {return data.x + " : " + data.y + " docs"}); function(data) {return data.x + " : " + data.y + " docs"});
} }
}); });
</script> </script>
#(/hosts)# #(/hosts)#
#(hostanalysis)#:: #(hostanalysis)#::
<fieldset><legend>Host Analysis</legend> <fieldset><legend>Host Analysis</legend>
#{facets}# #{facets}#
<table class="sortable" style="float:left; border-width: 0"> <table class="sortable" style="float:left; border-width: 0">
<tr class="TableCellDark"> <tr class="TableCellDark">
<td style="text-align:left;white-space:nowrap" colspan="5" class="pending">#[facetname]#</td> <td style="text-align:left;white-space:nowrap" colspan="5" class="pending">#[facetname]#</td>
<td style="text-align:right;white-space:nowrap" colspan="5" class="listingok">#</td> <td style="text-align:right;white-space:nowrap" colspan="5" class="listingok">#</td>
</tr> </tr>
#{facet}# #{facet}#
<tr class="TableCellLight"> <tr class="TableCellLight">
<td style="text-align:left;white-space:nowrap" colspan="5" class="pending">#[key]#</td> <td style="text-align:left;white-space:nowrap" colspan="5" class="pending">#[key]#</td>
<td style="text-align:right;white-space:nowrap" colspan="5" class="listingok"><a href="#[a]#" target="_blank" class="forceNoExternalIcon">#[count]#</a></td> <td style="text-align:right;white-space:nowrap" colspan="5" class="listingok"><a href="#[a]#" target="_blank" class="forceNoExternalIcon">#[count]#</a></td>
</tr> </tr>
#{/facet}# #{/facet}#
</table>&nbsp;&nbsp; </table>&nbsp;&nbsp;
#{/facets}# #{/facets}#
</fieldset> </fieldset>
#(/hostanalysis)# #(/hostanalysis)#
#(files)#:: #(files)#::
<fieldset><legend>Browser for <a href="#[path]#" target="_blank">#[path]#</a></legend> <fieldset><legend>Browser for <a href="#[path]#" target="_blank">#[path]#</a></legend>
<p>documents stored for host: #[hostsize]#; documents stored for subpath: #[subpathloadsize]#; unloaded documents detected in subpath: #[subpathdetectedsize]# <!-- #(complete)#;<a href="HostBrowser.html?admin=#[admin]#&complete=true&path=#[path]#">get complete list</a>::<a href="HostBrowser.html?admin=#[admin]#&path=#[path]#">directory view</a>#(/complete)#--> <p>documents stored for host: #[hostsize]#; documents stored for subpath: #[subpathloadsize]#; unloaded documents detected in subpath: #[subpathdetectedsize]# <!-- #(complete)#;<a href="IndexBrowser_p.html?complete=true&path=#[path]#">get complete list</a>::<a href="IndexBrowser_p.html?path=#[path]#">directory view</a>#(/complete)#-->
</p> </p>
<table class="sortable" style="float:left; border-width: 0"> <table class="sortable" style="float:left; border-width: 0">
<thead> <thead>
<tr> <tr>
<th style="text-align:center; width:32px"></th> <th style="text-align:center; width:32px"></th>
<th style="text-align:left; width: 600px" class="listing">Path</th> <th style="text-align:left; width: 600px" class="listing">Path</th>
<th style="text-align:right; padding:2px;" class="listingem">stored</th> <th style="text-align:right; padding:2px;" class="listingem">stored</th>
<th style="text-align:right; padding:2px;" class="listingem">linked</th> <th style="text-align:right; padding:2px;" class="listingem">linked</th>
<th style="text-align:right; padding:2px;" class="listingem">pending</th> <th style="text-align:right; padding:2px;" class="listingem">pending</th>
<th style="text-align:right; padding:2px;" class="listingem">excluded</th> <th style="text-align:right; padding:2px;" class="listingem">excluded</th>
<th style="text-align:right; padding:2px;" class="listingem">failed</th> <th style="text-align:right; padding:2px;" class="listingem">failed</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
#(root)# #(root)#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#"> <tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td style="text-align:center"></td> <td style="text-align:center"></td>
<td style="text-align:left;white-space:nowrap"><a href="HostBrowser.html?admin=#[admin]#&path=#[path]#" class="listing">..</a></td> <td style="text-align:left;white-space:nowrap"><a href="IndexBrowser_p.html?path=#[path]#" class="listing">..</a></td>
<td style="text-align:right;white-space:nowrap" colspan="5"></td> <td style="text-align:right;white-space:nowrap" colspan="5"></td>
</tr>:: </tr>::
#(/root)# #(/root)#
#{list}# #{list}#
#(type)#<!--file--> #(type)#<!--file-->
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#"> <tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td style="text-align:center"><div><a href="ViewFile.html?url=#[url]#"><img src="env/grafics/doc.gif" alt=""/></a><span>Show Metadata</span></div></td> <td style="text-align:center"><div><a href="ViewFile.html?url=#[url]#"><img src="env/grafics/doc.gif" alt=""/><span>Metadata</span></a></div></td>
<td style="text-align:left;white-space:nowrap" class=#(stored)#"listingem"::"listing"#(/stored)#><a href="#[url]#" target="_blank">#[url]#</a></td> <td style="text-align:left;white-space:nowrap" class=#(stored)#"listingem"::"listing"#(/stored)#><a href="#[url]#" target="_blank">#[url]#</a></td>
#(stored)# #(stored)#
#(load)#<td style="text-align:left;white-space:nowrap" colspan="5" class="listingem">link, detected from context</td>::<td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok"><a href="HostBrowser.html?admin=#[admin]#&load=#[url]#&path=#[path]#">load &amp; index</a>#(/load)#</td>:: #(load)#<td style="text-align:left;white-space:nowrap" colspan="5" class="listingem">link, detected from context</td>::<td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok"><a href="IndexBrowser_p.html?load=#[url]#&path=#[path]#">load &amp; index</a>#(/load)#</td>::
<td style="text-align:left;white-space:nowrap" colspan="1" class="listingok">indexed</td><td style="text-align:left;white-space:nowrap" colspan="4" class="listingem">#[comment]#</td>:: <td style="text-align:left;white-space:nowrap" colspan="1" class="listingok">indexed</td><td style="text-align:left;white-space:nowrap" colspan="4" class="listingem">#[comment]#</td>::
<td style="text-align:left;white-space:nowrap" colspan="5" class="pending">loading</td>:: <td style="text-align:left;white-space:nowrap" colspan="5" class="pending">loading</td>::
<td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok">#[error]#</td> <td style="text-align:left;white-space:nowrap" colspan="5" class="listingnok">#[error]#</td>
#(/stored)# #(/stored)#
</tr>::<!--folder--> </tr>::<!--folder-->
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#"> <tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td style="text-align:center"><img src="env/grafics/dir.gif" alt="Directory"/></td> <td style="text-align:center"><img src="env/grafics/dir.gif" alt="Directory"/></td>
<td style="text-align:left;white-space:nowrap" class="listing"><a href="HostBrowser.html?admin=#[admin]#&path=#[url]#" class="listing">#[url]#</a></td> <td style="text-align:left;white-space:nowrap" class="listing"><a href="IndexBrowser_p.html?path=#[url]#" class="listing">#[url]#</a></td>
<td style="text-align:right" class="commit">#[stored]#</td> <td style="text-align:right" class="commit">#[stored]#</td>
<td style="text-align:right" class="listing">#[linked]#</td> <td style="text-align:right" class="listing">#[linked]#</td>
<td style="text-align:right" #(pendingVisible)#class="listingem"::class="pending"#(/pendingVisible)#>#[pending]#</td> <td style="text-align:right" #(pendingVisible)#class="listingem"::class="pending"#(/pendingVisible)#>#[pending]#</td>
@ -231,7 +216,7 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
</table> </table>
</fieldset> </fieldset>
#(linkgraph)#<div style="text-align:center"><form><input name="showlinkstructure" onClick="location.href = location.toString() + '&showlinkstructure=';" class="btn btn-default btn-xs" value="show link structure graph"/></form></div>:: #(linkgraph)#<div style="text-align:center"><form><input name="showlinkstructure" onClick="location.href = location.toString() + '&showlinkstructure=';" class="btn btn-default btn-xs" value="show link graph"/></form></div>::
<script src="js/d3.v5.min.js"></script> <script src="js/d3.v5.min.js"></script>
<script src="js/hypertree.js"></script> <script src="js/hypertree.js"></script>
<div id="linkstructure"></div> <div id="linkstructure"></div>
@ -246,38 +231,38 @@ var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=
<ol style="list-style-type : none; padding-left: 0px"> <ol style="list-style-type : none; padding-left: 0px">
#{list}# #{list}#
<li style="float:left; padding:1px 5px 1px 5px;"> <li style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="HostBrowser.html?admin=#[admin]#&path=#[link]#">#[host]#</a></div></div> <div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="IndexBrowser_p.html?path=#[link]#">#[host]#</a></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div> <div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</li> </li>
#{/list}# #{/list}#
</ol> </ol>
#(admin)#::
<p style="clear:both"><br/> <p style="clear:both"><br/>
<img src="WebStructurePicture_p.png?host=#[host]#&depth=3&width=1024&height=576&nodes=200&time=1000&colortext=888888&colorback=FFFFFF&colordot0=1111BB&colordota=11BB11&colorline=222222&colorlineend=333333"> <img src="WebStructurePicture_p.png?host=#[host]#&depth=3&width=1024&height=576&nodes=200&time=1000&colortext=888888&colorback=FFFFFF&colordot0=1111BB&colordota=11BB11&colorline=222222&colorlineend=333333">
</p> </p>
#(/admin)#
</fieldset> </fieldset>
#(/outbound)# #(/outbound)#
#(inbound)#:: #(inbound)#::
<fieldset><legend>Inbound Links, incoming to #[host]# - Host List</legend> <fieldset><legend>Inbound Links, incoming to #[host]# - Host List</legend>
<ol style="list-style-type : none; padding-left: 0px"> <ol style="list-style-type : none; padding-left: 0px">
#{list}# #{list}#
<li style="float:left; padding:1px 5px 1px 5px;"> <li style="float:left; padding:1px 5px 1px 5px;">
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="HostBrowser.html?admin=#[admin]#&path=#[host]#">#[host]#</a></div></div> <div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><a href="IndexBrowser_p.html?path=#[host]#">#[host]#</a></div></div>
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div> <div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]# URLs</div>
</li> </li>
#{/list}# #{/list}#
</ol> </ol>
</fieldset> </fieldset>
#(/inbound)# #(/inbound)#
#(authorized)#:: #(hosts)#::
#(admin)#:: <form action="IndexBrowser_p.html" method="get">
#%HostBrowserAdmin_p.html%# <fieldset><legend>Administration Options</legend>
#(/admin)# <div>Delete all <span class="error">Load Errors</span> from index <input style="width:240px " type="submit" name="deleteLoadErrors" value="Delete Load Errors" class="btn btn-primary"/></div>
#(/authorized)# </fieldset>
</form>
#(/hosts)#
#%env/templates/footer.template%# #%env/templates/footer.template%#
</body> </body>
</html> </html>

@ -1,5 +1,5 @@
/** /**
* HostBrowser * IndexBrowser
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany * Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 27.09.2012 at http://yacy.net * First released 27.09.2012 at http://yacy.net
* *
@ -71,14 +71,14 @@ import net.yacy.server.serverSwitch;
/** /**
* Browser for indexed resources * Browser for indexed resources
*/ */
public class HostBrowser { public class IndexBrowser_p {
final static long TIMEOUT = 10000L; final static long TIMEOUT = 10000L;
public static enum StoreType { public static enum StoreType {
LINK, INDEX, EXCLUDED, FAILED, RELOAD; LINK, INDEX, EXCLUDED, FAILED, RELOAD;
} }
/** /**
* <p>Retrieve local index entries for a path, or for hosts with the most references. Also allow some maintaining operations on entries with load errors.</p> * <p>Retrieve local index entries for a path, or for hosts with the most references. Also allow some maintaining operations on entries with load errors.</p>
* <p>Some parameters need administrator authentication or unauthenticated local host requests to be allowed : load, deleteLoadErrors, delete, reload404, * <p>Some parameters need administrator authentication or unauthenticated local host requests to be allowed : load, deleteLoadErrors, delete, reload404,
@ -87,36 +87,34 @@ public class HostBrowser {
* <p> * <p>
* Configuration settings : * Configuration settings :
* <ul> * <ul>
* <li>browser.autoload : allow the administrator to stack URLs to the local crawl queue, manually with the "load" parameter, * <li>browser.autoload : allow the administrator to stack URLs to the local crawl queue, manually with the "load" parameter,
* or automatically when the "path" parameter is filled with an unknown URL</li> * or automatically when the "path" parameter is filled with an unknown URL</li>
* <li>browser.load4everyone : allow everyone to stack URLs to the local crawl queue. * <li>browser.load4everyone : allow everyone to stack URLs to the local crawl queue.
* "browser.autoload" has also to be set to true to enable automatic loading on an unknown path</li> * "browser.autoload" has also to be set to true to enable automatic loading on an unknown path</li>
* <li>publicSearchpage : set to false to restrict use of this servlet to authenticated administrator only</li> * <li>publicSearchpage : set to false to restrict use of this servlet to authenticated administrator only</li>
* <li>publicTopmenu : set to false to hide the top navigation bar to non authenticated users</li>
* <li>decoration.hostanalysis : add supplementary hosts information for debug/analysis purpose</li> * <li>decoration.hostanalysis : add supplementary hosts information for debug/analysis purpose</li>
* <li>decoration.grafics.linkstructure : display a link structure graph when the path parameter is filled</li> * <li>decoration.grafics.linkstructure : display a link structure graph when the path parameter is filled</li>
* </ul> * </ul>
* </p> * </p>
* @param header servlet request header * @param header servlet request header
* @param post request parameters. Supported keys :<ul> * @param post request parameters. Supported keys :<ul>
* <li>admin : when "true", display in the html page render the administration context (menu and top navbar)</li> * <li>path : root URL or host name to browse (ignored when the hosts parameter is filled). When not yet locally indexed, this URL can be automatically crawled and indexed
* <li>path : root URL or host name to browse (ignored when the hosts parameter is filled). When not yet locally indexed, this URL can be automatically crawled and indexed * when "browser.autoload" or "browser.load4everyone" configuration settings are set to true.</li>
* when "browser.autoload" or "browser.load4everyone" configuration settings are set to true.</li> * <li>load : URL to crawl and index.</li>
* <li>load : URL to crawl and index.</li> * <li>deleteLoadErrors : delete from the local index documents with load error (HTTP status different from 200 or any other failure).</li>
* <li>deleteLoadErrors : delete from the local index documents with load error (HTTP status different from 200 or any other failure).</li> * <li>hosts : generate hosts with most references list. Supported values :
* <li>hosts : generate hosts with most references list. Supported values : * <ul>
* <ul> * <li>"crawling" : restrict to host currently crawled</li>
* <li>"crawling" : restrict to host currently crawled</li> * <li>"error" : restrict to hosts with having at least one resource load error</li>
* <li>"error" : restrict to hosts with having at least one resource load error</li> * </ul>
* </ul> * </li>
* </li> * <li>delete : delete from the index whole documents tree matching the path prefix</li>
* <li>delete : delete from the index whole documents tree matching the path prefix</li> * <li>reload404 : reload documents matching the path prefix and which previously failed to load due to a network error</li>
* <li>reload404 : reload documents matching the path prefix and which previously failed to load due to a network error</li> * <li>facetcount : </li>
* <li>facetcount : </li> * <li>complete : we want only root paths for complete lists</li>
* <li>complete : we want only root paths for complete lists</li> * <li>nepr :</li>
* <li>nepr :</li> * <li>showlinkstructure : when present, display a link graph for path</li>
* <li>showlinkstructure : when present, display a link graph for path</li> * </ul>
* </ul>
* @param env server environment * @param env server environment
* @return the servlet answer object * @return the servlet answer object
*/ */
@ -125,53 +123,23 @@ public class HostBrowser {
// return variable that accumulates replacements // return variable that accumulates replacements
final Switchboard sb = (Switchboard) env; final Switchboard sb = (Switchboard) env;
Fulltext fulltext = sb.index.fulltext(); Fulltext fulltext = sb.index.fulltext();
final boolean authorized = sb.verifyAuthentication(header); final boolean autoload = sb.getConfigBool("browser.autoload", true);
final boolean autoload = authorized && sb.getConfigBool("browser.autoload", true);
final boolean load4everyone = sb.getConfigBool("browser.load4everyone", false); final boolean load4everyone = sb.getConfigBool("browser.load4everyone", false);
final boolean loadRight = autoload || load4everyone; // add config later final boolean loadRight = autoload || load4everyone; // add config later
final boolean searchAllowed = sb.getConfigBool(SwitchboardConstants.PUBLIC_SEARCHPAGE, true) || authorized;
final serverObjects prop = new serverObjects(); final serverObjects prop = new serverObjects();
// set default values // set default values
prop.put("path", ""); prop.put("path", "");
prop.put("result", ""); prop.put("result", "");
prop.put("hosts", 0); prop.put("hosts", 0);
prop.put("files", 0); prop.put("files", 0);
prop.put("hostanalysis", 0); prop.put("hostanalysis", 0);
prop.put("admin", "false");
boolean admin = false;
String referer = header.get("Referer", ""); String referer = header.get("Referer", "");
if ((post != null && post.getBoolean("admin")) || referer.contains("HostBrowser.html?admin=true")) {
prop.put("topmenu", 2);
prop.put("admin", "true");
admin = true;
} else if (authorized) { // show top nav to admins
prop.put("topmenu", 1);
} else { // for other respect setting in Search Design Configuration
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
}
final String promoteSearchPageGreeting =
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ?
env.getConfig("network.unit.description", "") :
env.getConfig(SwitchboardConstants.GREETING, "");
prop.put("topmenu_promoteSearchPageGreeting", promoteSearchPageGreeting);
if (!searchAllowed) {
prop.put("result", "You are not allowed to use this page. Please ask an administrator for permission.");
prop.putNum("ucount", 0);
return prop;
}
if(authorized) {
/* Fill the "admin" parameter for authorized links */
prop.put("authorized_admin", Boolean.toString(admin));
}
String path = post == null ? "" : post.get("path", "").trim(); String path = post == null ? "" : post.get("path", "").trim();
if (authorized) sb.index.fulltext().commit(true); sb.index.fulltext().commit(true);
if (post == null || env == null) { if (post == null || env == null) {
prop.putNum("ucount", fulltext.collectionSize()); prop.putNum("ucount", fulltext.collectionSize());
return prop; return prop;
@ -186,8 +154,8 @@ public class HostBrowser {
!path.startsWith("smb://") && !path.startsWith("smb://") &&
!path.startsWith("file://"))) { path = "http://" + path; } !path.startsWith("file://"))) { path = "http://" + path; }
prop.putHTML("path", path); prop.putHTML("path", path);
prop.put("delete", authorized && path.length() > 0 ? 1 : 0); prop.put("delete", path.length() > 0 ? 1 : 0);
DigestURL pathURI = null; DigestURL pathURI = null;
try {pathURI = new DigestURL(path);} catch (final MalformedURLException e) {} try {pathURI = new DigestURL(path);} catch (final MalformedURLException e) {}
@ -231,61 +199,60 @@ public class HostBrowser {
} }
} }
if (authorized && post.containsKey("deleteLoadErrors")) { if (post.containsKey("deleteLoadErrors")) {
try { try {
fulltext.getDefaultConnector().deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND " fulltext.getDefaultConnector().deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND "
+ CollectionSchema.httpstatus_i.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists + CollectionSchema.httpstatus_i.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists
ConcurrentLog.info ("HostBrowser:", "delete documents with httpstatus_i <> 200"); ConcurrentLog.info ("IndexBrowser_p:", "delete documents with httpstatus_i <> 200");
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" ); fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" );
ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = fail"); ConcurrentLog.info ("IndexBrowser_p:", "delete documents with failtype_s = fail");
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.excl.name() + "\"" ); fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.excl.name() + "\"" );
ConcurrentLog.info ("HostBrowser:", "delete documents with failtype_s = excl"); ConcurrentLog.info ("IndexBrowser_p:", "delete documents with failtype_s = excl");
prop.putNum("ucount", fulltext.collectionSize()); prop.putNum("ucount", fulltext.collectionSize());
return prop; return prop;
} catch (final IOException ex) { } catch (final IOException ex) {
ConcurrentLog.logException(ex); ConcurrentLog.logException(ex);
} }
} }
if (post.containsKey("hosts")) { if (post.containsKey("hosts")) {
// generate host list // generate host list
try { try {
boolean onlyCrawling = "crawling".equals(post.get("hosts", "")); boolean onlyCrawling = "crawling".equals(post.get("hosts", ""));
boolean onlyErrors = "error".equals(post.get("hosts", "")); boolean onlyErrors = "error".equals(post.get("hosts", ""));
int maxcount = authorized ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums int maxcount = 2 * 3 * 2 * 5 * 7 * 2 * 3; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
// collect hosts from index // collect hosts from index
ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName()); ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", maxcount, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
if (hostscore == null) hostscore = new ClusteredScoreMap<String>(true); if (hostscore == null) hostscore = new ClusteredScoreMap<String>(true);
// collect hosts from crawler // collect hosts from crawler
final Map<String, Integer[]> crawler = (authorized) ? sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots) : new HashMap<String, Integer[]>(); final Map<String, Integer[]> crawler = sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots);
final Map<String, Integer> hostNameToPendingCount = new HashMap<>(); final Map<String, Integer> hostNameToPendingCount = new HashMap<>();
for(Entry<String, Integer[]>crawlerEntry: crawler.entrySet()) { for(Entry<String, Integer[]>crawlerEntry: crawler.entrySet()) {
/* The local stack returns keys composed of "hostname:port" : we now sum pending URLs counts by host name */ /* The local stack returns keys composed of "hostname:port" : we now sum pending URLs counts by host name */
String hostName = Domains.stripToHostName(crawlerEntry.getKey()); String hostName = Domains.stripToHostName(crawlerEntry.getKey());
Integer pendingCount = hostNameToPendingCount.get(hostName); Integer pendingCount = hostNameToPendingCount.get(hostName);
if(pendingCount == null) { if(pendingCount == null) {
pendingCount = 0; pendingCount = 0;
} }
pendingCount += crawlerEntry.getValue()[0]; pendingCount += crawlerEntry.getValue()[0];
hostNameToPendingCount.put(hostName, pendingCount); hostNameToPendingCount.put(hostName, pendingCount);
} }
// collect the errorurls // collect the errorurls
Map<String, ReversibleScoreMap<String>> exclfacets = authorized ? fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.excl.name(), maxcount, CollectionSchema.host_s.getSolrFieldName()) : null; Map<String, ReversibleScoreMap<String>> exclfacets = fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.excl.name(), maxcount, CollectionSchema.host_s.getSolrFieldName());
ReversibleScoreMap<String> exclscore = exclfacets == null ? new ClusteredScoreMap<String>(true) : exclfacets.get(CollectionSchema.host_s.getSolrFieldName()); ReversibleScoreMap<String> exclscore = exclfacets == null ? new ClusteredScoreMap<String>(true) : exclfacets.get(CollectionSchema.host_s.getSolrFieldName());
Map<String, ReversibleScoreMap<String>> failfacets = authorized ? fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.fail.name(), maxcount, CollectionSchema.host_s.getSolrFieldName()) : null; Map<String, ReversibleScoreMap<String>> failfacets = fulltext.getDefaultConnector().getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.fail.name(), maxcount, CollectionSchema.host_s.getSolrFieldName());
ReversibleScoreMap<String> failscore = failfacets == null ? new ClusteredScoreMap<String>(true) : failfacets.get(CollectionSchema.host_s.getSolrFieldName()); ReversibleScoreMap<String> failscore = failfacets == null ? new ClusteredScoreMap<String>(true) : failfacets.get(CollectionSchema.host_s.getSolrFieldName());
int c = 0; int c = 0;
Iterator<String> i = hostscore.keys(false); Iterator<String> i = hostscore.keys(false);
String host; String host;
while (i.hasNext() && c < maxcount) { while (i.hasNext() && c < maxcount) {
host = i.next(); host = i.next();
prop.put("hosts_list_" + c + "_admin", admin ? "true" : "false");
prop.putHTML("hosts_list_" + c + "_host", host); prop.putHTML("hosts_list_" + c + "_host", host);
boolean inCrawler = hostNameToPendingCount.containsKey(host); boolean inCrawler = hostNameToPendingCount.containsKey(host);
int exclcount = exclscore.get(host); int exclcount = exclscore.get(host);
@ -294,7 +261,7 @@ public class HostBrowser {
prop.put("hosts_list_" + c + "_count", hostscore.get(host)); prop.put("hosts_list_" + c + "_count", hostscore.get(host));
prop.put("hosts_list_" + c + "_crawler", inCrawler ? 1 : 0); prop.put("hosts_list_" + c + "_crawler", inCrawler ? 1 : 0);
if (inCrawler) { if (inCrawler) {
prop.put("hosts_list_" + c + "_crawler_pending", hostNameToPendingCount.get(host)); prop.put("hosts_list_" + c + "_crawler_pending", hostNameToPendingCount.get(host));
} }
prop.put("hosts_list_" + c + "_errors", errors > 0 ? 1 : 0); prop.put("hosts_list_" + c + "_errors", errors > 0 ? 1 : 0);
if (errors > 0) { if (errors > 0) {
@ -311,18 +278,18 @@ public class HostBrowser {
} }
} }
prop.put("hosts_list", c); prop.put("hosts_list", c);
prop.put("hosts_authorized", authorized ? 1 : 0); prop.put("hosts_authorized", 1);
prop.put("hosts", 1); prop.put("hosts", 1);
} catch (final IOException e) { } catch (final IOException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
} }
if (path.length() > 0) { if (path.length() > 0) {
try { try {
DigestURL uri = new DigestURL(path); DigestURL uri = new DigestURL(path);
String host = uri.getHost(); String host = uri.getHost();
// write host analysis if path after host is empty // write host analysis if path after host is empty
if (uri.getPath().length() <= 1 && host != null && host.length() > 0 && sb.getConfigBool("decoration.hostanalysis", false)) { if (uri.getPath().length() <= 1 && host != null && host.length() > 0 && sb.getConfigBool("decoration.hostanalysis", false)) {
//how many documents per crawldepth_i; get crawldepth_i facet for host //how many documents per crawldepth_i; get crawldepth_i facet for host
@ -364,27 +331,25 @@ public class HostBrowser {
prop.put("hostanalysis_facets", fc); prop.put("hostanalysis_facets", fc);
prop.put("hostanalysis", 1); prop.put("hostanalysis", 1);
} }
// write file list for subpath // write file list for subpath
boolean delete = false; boolean delete = false;
boolean reload404 = false; boolean reload404 = false;
if (authorized && post.containsKey("delete")) { if (post.containsKey("delete")) {
// delete the complete path!! That includes everything that matches with this prefix. // delete the complete path!! That includes everything that matches with this prefix.
delete = true; delete = true;
} }
if (authorized && post.containsKey("reload404")) { if (post.containsKey("reload404")) {
// try to re-load all urls that have load errors and matches with this prefix. // try to re-load all urls that have load errors and matches with this prefix.
reload404 = true; reload404 = true;
} }
int facetcount=post.getInt("facetcount", 0); int facetcount = post.getInt("facetcount", 0);
boolean complete = post.getBoolean("complete"); boolean complete = post.getBoolean("complete");
if (complete) { // we want only root paths for complete lists if (complete) { // we want only root paths for complete lists
p = path.indexOf('/', 10); p = path.indexOf('/', 10);
if (p > 0) path = path.substring(0, p + 1); if (p > 0) path = path.substring(0, p + 1);
} }
prop.put("files_complete", complete ? 1 : 0); prop.put("files_complete", complete ? 1 : 0);
prop.put("files_complete_admin", admin ? "true" : "false");
prop.putHTML("files_complete_path", path); prop.putHTML("files_complete_path", path);
p = path.substring(0, path.length() - 1).lastIndexOf('/'); p = path.substring(0, path.length() - 1).lastIndexOf('/');
if (p < 8) { if (p < 8) {
@ -392,15 +357,13 @@ public class HostBrowser {
} else { } else {
prop.put("files_root", 0); prop.put("files_root", 0);
prop.putHTML("files_root_path", path.substring(0, p + 1)); prop.putHTML("files_root_path", path.substring(0, p + 1));
prop.put("files_root_admin", admin ? "true" : "false");
} }
// generate file list from path // generate file list from path
prop.putHTML("outbound_host", host); prop.putHTML("outbound_host", host);
if (authorized) prop.putHTML("outbound_admin_host", host); //used for WebStructurePicture_p link
prop.putHTML("inbound_host", host); prop.putHTML("inbound_host", host);
String hosthash = uri.hosthash(); String hosthash = uri.hosthash();
String[] pathparts = uri.getPaths(); String[] pathparts = uri.getPaths();
// get all files for a specific host from the index // get all files for a specific host from the index
StringBuilder q = new StringBuilder(); StringBuilder q = new StringBuilder();
if (host == null) { if (host == null) {
@ -438,7 +401,7 @@ public class HostBrowser {
CollectionSchema.references_external_i.getSolrFieldName(), CollectionSchema.references_external_i.getSolrFieldName(),
CollectionSchema.references_exthosts_i.getSolrFieldName(), CollectionSchema.references_exthosts_i.getSolrFieldName(),
CollectionSchema.cr_host_chance_d.getSolrFieldName(), CollectionSchema.cr_host_chance_d.getSolrFieldName(),
CollectionSchema.cr_host_norm_i.getSolrFieldName() CollectionSchema.cr_host_norm_i.getSolrFieldName()
)); ));
solrQueryTask.start(); solrQueryTask.start();
Set<String> storedDocs = new HashSet<String>(); Set<String> storedDocs = new HashSet<String>();
@ -455,68 +418,68 @@ public class HostBrowser {
long timeoutReferences = System.currentTimeMillis() + 6000; long timeoutReferences = System.currentTimeMillis() + 6000;
ReferenceReportCache rrCache = sb.index.getReferenceReportCache(); ReferenceReportCache rrCache = sb.index.getReferenceReportCache();
try { try {
SolrDocument doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS); SolrDocument doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS);
while (doc != AbstractSolrConnector.POISON_DOCUMENT && doc != null) { while (doc != AbstractSolrConnector.POISON_DOCUMENT && doc != null) {
String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()); String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName()); String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
FailType error = errortype == null ? null : FailType.valueOf(errortype); FailType error = errortype == null ? null : FailType.valueOf(errortype);
String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()); String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
infoCache.put(ids, new InfoCacheEntry(sb.index.fulltext(), rrCache, doc, ids, System.currentTimeMillis() < timeoutReferences)); infoCache.put(ids, new InfoCacheEntry(sb.index.fulltext(), rrCache, doc, ids, System.currentTimeMillis() < timeoutReferences));
if (u.startsWith(path)) { if (u.startsWith(path)) {
if (delete) { if (delete) {
deleteIDs.add(ids); deleteIDs.add(ids);
} else { } else {
if (error == null) storedDocs.add(u); else { if (error == null) storedDocs.add(u); else {
if (reload404 && error == FailType.fail) { if (reload404 && error == FailType.fail) {
ArrayList<String> collections = (ArrayList<String>) doc.getFieldValue(CollectionSchema.collection_sxt.getSolrFieldName()); ArrayList<String> collections = (ArrayList<String>) doc.getFieldValue(CollectionSchema.collection_sxt.getSolrFieldName());
if (collections != null) reloadURLCollection.addAll(collections); if (collections != null) reloadURLCollection.addAll(collections);
reloadURLs.add(u); reloadURLs.add(u);
} }
if (authorized) errorDocs.put(u, error); errorDocs.put(u, error);
} }
} }
} else if (complete) { } else if (complete) {
if (error == null) storedDocs.add(u); else { if (error == null) storedDocs.add(u); else {
if (authorized) errorDocs.put(u, error); errorDocs.put(u, error);
} }
} }
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); // add the current link if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); // add the current link
if (error == null) { if (error == null) {
hostsize++; hostsize++;
// collect inboundlinks to browse the host // collect inboundlinks to browse the host
Iterator<String> links = URIMetadataNode.getLinks(doc, true); Iterator<String> links = URIMetadataNode.getLinks(doc, true);
while (links.hasNext()) { while (links.hasNext()) {
u = links.next(); u = links.next();
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u); if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u);
} }
// collect referrer links // collect referrer links
links = URIMetadataNode.getLinks(doc, false); links = URIMetadataNode.getLinks(doc, false);
while (links.hasNext()) { while (links.hasNext()) {
u = links.next(); u = links.next();
try { try {
MultiProtocolURL mu = new MultiProtocolURL(u); MultiProtocolURL mu = new MultiProtocolURL(u);
if (mu.getHost() != null) { if (mu.getHost() != null) {
ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost()); ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
if (lks == null) { if (lks == null) {
lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator); lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
outboundHosts.put(mu.getHost(), lks); outboundHosts.put(mu.getHost(), lks);
} }
lks.set(u, u.length()); lks.set(u, u.length());
} }
} catch (final MalformedURLException e) {} } catch (final MalformedURLException e) {}
} }
} }
remainingTime = timeoutList - System.currentTimeMillis(); remainingTime = timeoutList - System.currentTimeMillis();
if (remainingTime <= 0) { if (remainingTime <= 0) {
break; break;
} }
doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS); doc = docs.poll(remainingTime, TimeUnit.MILLISECONDS);
} }
} finally { } finally {
/* Ensure termination and proper resources release of the query thread */ /* Ensure termination and proper resources release of the query thread */
solrQueryTask.interrupt(); solrQueryTask.interrupt();
} }
if (deleteIDs.size() > 0) sb.remove(deleteIDs); if (deleteIDs.size() > 0) sb.remove(deleteIDs);
if (reloadURLs.size() > 0) { if (reloadURLs.size() > 0) {
@ -524,19 +487,19 @@ public class HostBrowser {
for (String collection: reloadURLCollection) cm.put(collection, QueryParams.catchall_pattern); for (String collection: reloadURLCollection) cm.put(collection, QueryParams.catchall_pattern);
sb.reload(reloadURLs, cm.size() > 0 ? cm : CrawlProfile.collectionParser("user"), false); sb.reload(reloadURLs, cm.size() > 0 ? cm : CrawlProfile.collectionParser("user"), false);
} }
// collect from crawler // collect from crawler
List<Request> domainStackReferences = (authorized) ? sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000) : new ArrayList<Request>(0); List<Request> domainStackReferences = sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000);
Set<String> loadingLinks = new HashSet<String>(); Set<String> loadingLinks = new HashSet<String>();
for (Request crawlEntry: domainStackReferences) loadingLinks.add(crawlEntry.url().toNormalform(true)); for (Request crawlEntry: domainStackReferences) loadingLinks.add(crawlEntry.url().toNormalform(true));
// now combine all lists into one // now combine all lists into one
Map<String, StoreType> files = new HashMap<String, StoreType>(); Map<String, StoreType> files = new HashMap<String, StoreType>();
for (String u: storedDocs) files.put(u, StoreType.INDEX); for (String u: storedDocs) files.put(u, StoreType.INDEX);
for (Map.Entry<String, FailType> e: errorDocs.entrySet()) files.put(e.getKey(), e.getValue() == FailType.fail ? StoreType.FAILED : StoreType.EXCLUDED); for (Map.Entry<String, FailType> e: errorDocs.entrySet()) files.put(e.getKey(), e.getValue() == FailType.fail ? StoreType.FAILED : StoreType.EXCLUDED);
for (String u: inboundLinks) if (!files.containsKey(u)) files.put(u, StoreType.LINK); for (String u: inboundLinks) if (!files.containsKey(u)) files.put(u, StoreType.LINK);
for (String u: loadingLinks) if (u.startsWith(path) && !files.containsKey(u)) files.put(u, StoreType.LINK); for (String u: loadingLinks) if (u.startsWith(path) && !files.containsKey(u)) files.put(u, StoreType.LINK);
ConcurrentLog.info("HostBrowser", "collected " + files.size() + " urls for path " + path); ConcurrentLog.info("IndexBrowser_p", "collected " + files.size() + " urls for path " + path);
// distinguish files and folders // distinguish files and folders
Map<String, Object> list = new TreeMap<String, Object>(); // a directory list; if object is boolean, its a file; if its a int[], then its a folder Map<String, Object> list = new TreeMap<String, Object>(); // a directory list; if object is boolean, its a file; if its a int[], then its a folder
@ -575,7 +538,7 @@ public class HostBrowser {
} }
} }
} }
int maxcount = 1000; int maxcount = 1000;
int c = 0; int c = 0;
// first list only folders // first list only folders
@ -587,7 +550,6 @@ public class HostBrowser {
// this is a folder // this is a folder
prop.put("files_list_" + c + "_type", 1); prop.put("files_list_" + c + "_type", 1);
prop.putHTML("files_list_" + c + "_type_url", entry.getKey()); prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
int linked = ((int[]) entry.getValue())[0]; int linked = ((int[]) entry.getValue())[0];
int stored = ((int[]) entry.getValue())[1]; int stored = ((int[]) entry.getValue())[1];
int crawler = ((int[]) entry.getValue())[2]; int crawler = ((int[]) entry.getValue())[2];
@ -610,7 +572,6 @@ public class HostBrowser {
// this is a file // this is a file
prop.put("files_list_" + c + "_type", 0); prop.put("files_list_" + c + "_type", 0);
prop.putHTML("files_list_" + c + "_type_url", entry.getKey()); prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
StoreType type = (StoreType) entry.getValue(); StoreType type = (StoreType) entry.getValue();
try {uri = new DigestURL(entry.getKey());} catch (final MalformedURLException e) {uri = null;} try {uri = new DigestURL(entry.getKey());} catch (final MalformedURLException e) {uri = null;}
HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash()); // todo: cannot identify errors HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash()); // todo: cannot identify errors
@ -640,7 +601,6 @@ public class HostBrowser {
if (loadRight) { if (loadRight) {
prop.putHTML("files_list_" + c + "_type_stored_load_url", entry.getKey()); prop.putHTML("files_list_" + c + "_type_stored_load_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_stored_load_path", path); prop.putHTML("files_list_" + c + "_type_stored_load_path", path);
prop.putHTML("files_list_" + c + "_type_stored_load_admin", Boolean.toString(admin));
} }
if (++c >= maxcount) break; if (++c >= maxcount) break;
} }
@ -669,7 +629,6 @@ public class HostBrowser {
Iterator<String> i = score.keys(false); Iterator<String> i = score.keys(false);
while (i.hasNext() && c < maxcount) { while (i.hasNext() && c < maxcount) {
host = i.next(); host = i.next();
prop.put("inbound_list_" + c + "_admin", admin ? "true" : "false");
prop.putHTML("inbound_list_" + c + "_host", sb.webStructure.hostHash2hostName(host)); prop.putHTML("inbound_list_" + c + "_host", sb.webStructure.hostHash2hostName(host));
prop.put("inbound_list_" + c + "_count", score.get(host)); prop.put("inbound_list_" + c + "_count", score.get(host));
c++; c++;
@ -679,7 +638,7 @@ public class HostBrowser {
} else { } else {
prop.put("inbound", 0); prop.put("inbound", 0);
} }
// generate outbound-links table // generate outbound-links table
if (outboundHosts.size() > 0) { if (outboundHosts.size() > 0) {
maxcount = 200; maxcount = 200;
@ -692,7 +651,6 @@ public class HostBrowser {
prop.putHTML("outbound_list_" + c + "_host", host); prop.putHTML("outbound_list_" + c + "_host", host);
prop.put("outbound_list_" + c + "_count", score.get(host)); prop.put("outbound_list_" + c + "_count", score.get(host));
prop.put("outbound_list_" + c + "_link", outboundHosts.get(host).getMinKey()); prop.put("outbound_list_" + c + "_link", outboundHosts.get(host).getMinKey());
prop.put("outbound_list_" + c + "_admin", admin ? "true" : "false");
c++; c++;
} }
prop.put("outbound_list", c); prop.put("outbound_list", c);
@ -700,7 +658,7 @@ public class HostBrowser {
} else { } else {
prop.put("outbound", 0); prop.put("outbound", 0);
} }
} catch (final Throwable e) { } catch (final Throwable e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }

@ -1,5 +1,5 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<hostbrowser> <indexbrowser>
#(hosts)#:: #(hosts)#::
<hosts> <hosts>
#{list}# #{list}#
@ -29,4 +29,4 @@
#{/list}# #{/list}#
</inbound> </inbound>
#(/inbound)# #(/inbound)#
</hostbrowser> </indexbrowser>

@ -84,7 +84,7 @@ function updatepage(str) {
<dd> <dd>
<input type="text" size="60" name="url" id="url" value="#[url]#" /> <input type="text" size="60" name="url" id="url" value="#[url]#" />
<input type="submit" name="show" class="btn btn-primary" value="Show Metadata" /> <input type="submit" name="show" class="btn btn-primary" value="Show Metadata" />
#(moar)#::<input type="button" value="Browse Host" class="btn btn-default" onClick="location.href='HostBrowser.html?path=' + document.getElementById('url').value" />#(/moar)# #(moar)#::<input type="button" value="Browse Host" class="btn btn-default" onClick="location.href='IndexBrowser_p.html?path=' + document.getElementById('url').value" />#(/moar)#
<div id="searchresults"></div> <div id="searchresults"></div>
</dd> </dd>
#(moar)#:: #(moar)#::

@ -191,7 +191,7 @@
<li><h3>Monitoring</h3></li> <li><h3>Monitoring</h3></li>
<li><a href="Status.html?noforward=" class="MenuItemLink">System Status</a></li> <li><a href="Status.html?noforward=" class="MenuItemLink">System Status</a></li>
#(navigation-p2p)#::<li><a href="Network.html" accesskey="w" class="MenuItemLink">Peer-to-Peer Network</a></li>#(/navigation-p2p)# #(navigation-p2p)#::<li><a href="Network.html" accesskey="w" class="MenuItemLink">Peer-to-Peer Network</a></li>#(/navigation-p2p)#
#(navigation-crawlmonitor)#::<li><a href="HostBrowser.html?admin=true&hosts=" class="MenuItemLink">Index Browser</a></li>#(/navigation-crawlmonitor)# #(navigation-crawlmonitor)#::<li><a href="IndexBrowser_p.html?admin=true&hosts=" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Index Browser</a></li>#(/navigation-crawlmonitor)#
<li><a href="AccessGrid_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Network Access</a></li> <li><a href="AccessGrid_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Network Access</a></li>
#(navigation-crawlmonitor)#::<li><a href="Crawler_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Crawler Monitor</a></li>#(/navigation-crawlmonitor)# #(navigation-crawlmonitor)#::<li><a href="Crawler_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Crawler Monitor</a></li>#(/navigation-crawlmonitor)#
<!-- <li><a href="terminal_p.html" accesskey="t" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Terminal</a></li> --> <!-- <li><a href="terminal_p.html" accesskey="t" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Terminal</a></li> -->

@ -40,7 +40,6 @@
<li id="header_websearch"><a href="index.html#(authSearch)#::?auth#(/authSearch)#" onclick="this.href='index.html?#(authSearch)#::auth&#(/authSearch)#former='+encodeURIComponent(document.searchform.search.value)">Web Search</a></li> <li id="header_websearch"><a href="index.html#(authSearch)#::?auth#(/authSearch)#" onclick="this.href='index.html?#(authSearch)#::auth&#(/authSearch)#former='+encodeURIComponent(document.searchform.search.value)">Web Search</a></li>
<li id="header_filesearch"><a href="yacyinteractive.html" onclick="this.href='yacyinteractive.html?handover='+document.searchform.search.value">File Search</a></li> <li id="header_filesearch"><a href="yacyinteractive.html" onclick="this.href='yacyinteractive.html?handover='+document.searchform.search.value">File Search</a></li>
<li id="header_comparesearch"><a href="compare_yacy.html?display=0">Compare Search</a></li> <li id="header_comparesearch"><a href="compare_yacy.html?display=0">Compare Search</a></li>
<li id="header_hostbrowser"><a href="HostBrowser.html?hosts=">Index Browser</a></li>
<li id="header_urlviewer"><a href="ViewFile.html">URL Viewer</a></li> <li id="header_urlviewer"><a href="ViewFile.html">URL Viewer</a></li>
<!--<li><a href="yacysearch_location.html">Location Search</a></li>--> <!--<li><a href="yacysearch_location.html">Location Search</a></li>-->
<li class="divider" role="separator"></li> <li class="divider" role="separator"></li>

@ -20,7 +20,6 @@
<li id="header_websearch"><a href="index.html#(authorized)#::?auth#(/authorized)#" onclick="this.href='index.html?#(authorized)#::auth&#(/authorized)#former='+encodeURIComponent(document.searchform.search.value)">Web Search</a></li> <li id="header_websearch"><a href="index.html#(authorized)#::?auth#(/authorized)#" onclick="this.href='index.html?#(authorized)#::auth&#(/authorized)#former='+encodeURIComponent(document.searchform.search.value)">Web Search</a></li>
<li id="header_filesearch"><a href="yacyinteractive.html" onclick="this.href='yacyinteractive.html?handover='+document.searchform.search.value">File Search</a></li> <li id="header_filesearch"><a href="yacyinteractive.html" onclick="this.href='yacyinteractive.html?handover='+document.searchform.search.value">File Search</a></li>
<li id="header_comparesearch"><a href="compare_yacy.html?display=0">Compare Search</a></li> <li id="header_comparesearch"><a href="compare_yacy.html?display=0">Compare Search</a></li>
<li id="header_hostbrowser"><a href="HostBrowser.html?hosts=">Index Browser</a></li>
<li id="header_urlviewer"><a href="ViewFile.html">URL Viewer</a></li> <li id="header_urlviewer"><a href="ViewFile.html">URL Viewer</a></li>
<!--<li><a href="yacysearch_location.html">Location Search</a></li>--> <!--<li><a href="yacysearch_location.html">Location Search</a></li>-->
<li class="divider" role="separator"></li> <li class="divider" role="separator"></li>

@ -1,7 +1,7 @@
<div class="SubMenu"> <div class="SubMenu">
<h3>Web Visualization</h3> <h3>Web Visualization</h3>
<ul class="SubMenu"> <ul class="SubMenu">
<li><a href="HostBrowser.html?admin=true" class="MenuItemLink">Index Browser</a></li> <li><a href="IndexBrowser_p.html?admin=true" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Index Browser</a></li>
<li><a href="WatchWebStructure_p.html?host=auto&amp;depth=3&amp;time=1000" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Web Structure</a></li> <li><a href="WatchWebStructure_p.html?host=auto&amp;depth=3&amp;time=1000" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Web Structure</a></li>
<li><a href="Collage.html" class="MenuItemLink">Image Collage</a></li> <li><a href="Collage.html" class="MenuItemLink">Image Collage</a></li>
</ul> </ul>

@ -4,7 +4,7 @@ User-agent: *
Disallow: /*query= Disallow: /*query=
Disallow: /proxy.html Disallow: /proxy.html
Disallow: /HostBrowser.html Disallow: /IndexBrowser_p.html
#(all)# #(all)#

@ -47,7 +47,7 @@
#(showPictures)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="yacysearch.html?contentdom=image#(authSearch)#::&auth#(/authSearch)#&url=#[link]#&query=#[former]#+inurl:#[link]#" target="_blank">Pictures</a>#(/showPictures)# #(showPictures)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="yacysearch.html?contentdom=image#(authSearch)#::&auth#(/authSearch)#&url=#[link]#&query=#[former]#+inurl:#[link]#" target="_blank">Pictures</a>#(/showPictures)#
#(showCache)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="CacheResource_p.html?url=#[link]#" target="_blank">Cache</a>#(/showCache)# #(showCache)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="CacheResource_p.html?url=#[link]#" target="_blank">Cache</a>#(/showCache)#
#(showProxy)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="proxy.html?url=#[link]#" target="_blank">View via proxy</a>#(/showProxy)# #(showProxy)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="proxy.html?url=#[link]#" target="_blank">View via proxy</a>#(/showProxy)#
#(showHostBrowser)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="HostBrowser.html?path=#[link]#"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a>#(/showHostBrowser)# #(showIndexBrowser)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="IndexBrowser_p.html?path=#[link]#"><img src="env/grafics/minitree.png" width="15" height="8" title="Browse index" alt="Browse index"/></a>#(/showIndexBrowser)#
#(showVocabulary)#::<br/>#{vocabulary}##[name]#:#[terms]# #{/vocabulary}##(/showVocabulary)# #(showVocabulary)#::<br/>#{vocabulary}##[name]#:#[terms]# #{/vocabulary}##(/showVocabulary)#
#(showSnapshots)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="#[link]#" target="_blank">#[extension]# Snapshot</a>#(/showSnapshots)# #(showSnapshots)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><a href="#[link]#" target="_blank">#[extension]# Snapshot</a>#(/showSnapshots)#
#(showRanking)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><span title="Raw ranking score value">Ranking: #[ranking]#</span>#(/showRanking)# #(showRanking)#::<span role="separator" aria-orientation="vertical">&nbsp;|&nbsp;</span><span title="Raw ranking score value">Ranking: #[ranking]#</span>#(/showRanking)#

@ -276,7 +276,7 @@ public class yacysearchitem {
prop.put("content_showPictures", sb.getConfigBool("search.result.show.pictures", true) ? 1 : 0); prop.put("content_showPictures", sb.getConfigBool("search.result.show.pictures", true) ? 1 : 0);
prop.put("content_showCache", sb.getConfigBool("search.result.show.cache", true) && Cache.has(resultURL.hash()) ? 1 : 0); prop.put("content_showCache", sb.getConfigBool("search.result.show.cache", true) && Cache.has(resultURL.hash()) ? 1 : 0);
prop.put("content_showProxy", sb.getConfigBool("search.result.show.proxy", true) && sb.getConfigBool("proxyURL", false) ? 1 : 0); prop.put("content_showProxy", sb.getConfigBool("search.result.show.proxy", true) && sb.getConfigBool("proxyURL", false) ? 1 : 0);
prop.put("content_showHostBrowser", sb.getConfigBool("search.result.show.hostbrowser", true) ? 1 : 0); prop.put("content_showIndexBrowser", sb.getConfigBool("search.result.show.indexbrowser", true) ? 1 : 0);
prop.put("content_showSnapshots", snapshotPaths != null && snapshotPaths.size() > 0 && sb.getConfigBool("search.result.show.snapshots", true) ? 1 : 0); prop.put("content_showSnapshots", snapshotPaths != null && snapshotPaths.size() > 0 && sb.getConfigBool("search.result.show.snapshots", true) ? 1 : 0);
prop.put("content_showVocabulary", sb.getConfigBool("search.result.show.vocabulary", true) ? 1 : 0); prop.put("content_showVocabulary", sb.getConfigBool("search.result.show.vocabulary", true) ? 1 : 0);
prop.put("content_showRanking", sb.getConfigBool("search.result.show.ranking", false) ? 1 : 0); prop.put("content_showRanking", sb.getConfigBool("search.result.show.ranking", false) ? 1 : 0);
@ -331,7 +331,7 @@ public class yacysearchitem {
prop.putUrlEncodedHTML("content_showPictures_former", origQ); prop.putUrlEncodedHTML("content_showPictures_former", origQ);
prop.put("content_showCache_link", resultUrlstring); prop.put("content_showCache_link", resultUrlstring);
prop.put("content_showProxy_link", resultUrlstring); prop.put("content_showProxy_link", resultUrlstring);
prop.put("content_showHostBrowser_link", resultUrlstring); prop.put("content_showIndexBrowser_link", resultUrlstring);
if (sb.getConfigBool("search.result.show.vocabulary", true)) { if (sb.getConfigBool("search.result.show.vocabulary", true)) {
int c = 0; int c = 0;
for (String key: result.getFieldNames()) { for (String key: result.getFieldNames()) {

@ -1427,7 +1427,7 @@ More Tutorials==Mehr Tutorials
Please see the tutorials on==Bitte besuchen Sie auch die Anleitungen auf Please see the tutorials on==Bitte besuchen Sie auch die Anleitungen auf
#----------------------------- #-----------------------------
#File: HostBrowser.html #File: IndexBrowser_p.html
#--------------------------- #---------------------------
#Index Browser==Index Browser #Index Browser==Index Browser
Browse the index of #[ucount]# documents.==Durchsuchen Sie den Index von #[ucount]# Dokumenten. Browse the index of #[ucount]# documents.==Durchsuchen Sie den Index von #[ucount]# Dokumenten.
@ -1468,10 +1468,6 @@ Inbound Links, incoming to #[host]# - Host List==Eingehende Links, eingehend auf
##[count]# URLs==#[count]# URL(s) ##[count]# URLs==#[count]# URL(s)
#Administration Options==Administration Optionen #Administration Options==Administration Optionen
<html lang="en">==<html lang="de"> <html lang="en">==<html lang="de">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Administrator Optionen Administration Options==Administrator Optionen
Delete all==Lösche alle Delete all==Lösche alle
>Load Errors<==>Ladefehler< >Load Errors<==>Ladefehler<

@ -668,7 +668,7 @@ Download from Vimeo==Descargar desde vimeo
More Tutorials==Más tutoriales More Tutorials==Más tutoriales
#----------------------------- #-----------------------------
#File: HostBrowser.html #File: IndexBrowser_p.html
#--------------------------- #---------------------------
>all hosts<==>todos los hosts< >all hosts<==>todos los hosts<
> or <==> o < > or <==> o <
@ -679,10 +679,6 @@ Documents without Errors==Documentos sin errores
>Path<==>Ruta< >Path<==>Ruta<
Administration Options==Opciones de Administración Administration Options==Opciones de Administración
<html lang="en">==<html lang="es"> <html lang="en">==<html lang="es">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Opciones de Administración Administration Options==Opciones de Administración
Delete all==Eliminar todo Delete all==Eliminar todo
>Load Errors<==>Errores de carga< >Load Errors<==>Errores de carga<

@ -1000,7 +1000,7 @@ This can be done using the <a href="CrawlStartExpert.html">Expert Crawl Start</a
The scheduler on crawls can be changed or removed using the <a href="Table_API_p.html">API Steering==Le planificateur de balayage peut &ecirc;tre modifi&eacute; ou supprim&eacute; au moyen de la <a href="Table_API_p.html">commande de l'API The scheduler on crawls can be changed or removed using the <a href="Table_API_p.html">API Steering==Le planificateur de balayage peut &ecirc;tre modifi&eacute; ou supprim&eacute; au moyen de la <a href="Table_API_p.html">commande de l'API
#--------------------------- #---------------------------
#File: HostBrowser.html #File: IndexBrowser_p.html
#--------------------------- #---------------------------
Browse the index of #[ucount]# documents.==Naviguer dans les #[ucount]# documents de l'index. Browse the index of #[ucount]# documents.==Naviguer dans les #[ucount]# documents de l'index.
Enter a host or an URL for a file list or view a list of==Saisir un nom de domaine ou une URL pour obtenir une liste de fichiers, ou visualiser la liste de Enter a host or an URL for a file list or view a list of==Saisir un nom de domaine ou une URL pour obtenir une liste de fichiers, ou visualiser la liste de
@ -1041,10 +1041,6 @@ Inbound Links, incoming to #[host]# - Host List==Liens entrants, vers #[host]# -
'number of documents about this date'=='Nombre de documents liés à cette date' 'number of documents about this date'=='Nombre de documents liés à cette date'
"show link structure graph"=="Afficher le graphique d'arborescence de liens" "show link structure graph"=="Afficher le graphique d'arborescence de liens"
Host has load error(s)==Erreur(s) de chargement sur ce domaine Host has load error(s)==Erreur(s) de chargement sur ce domaine
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Options d'administration Administration Options==Options d'administration
Delete all==Supprimer toutes les Delete all==Supprimer toutes les
>Load Errors<==>erreurs de chargement< >Load Errors<==>erreurs de chargement<

@ -647,7 +647,7 @@ Download from Vimeo==Scarica da Vimeo
More Tutorials==Altri tutorial More Tutorials==Altri tutorial
#----------------------------- #-----------------------------
#File: HostBrowser.html #File: IndexBrowser_p.html
#--------------------------- #---------------------------
>all hosts<==>tutti gli host< >all hosts<==>tutti gli host<
> or <==> o < > or <==> o <
@ -658,10 +658,6 @@ Documents without Errors==Documenti privi di errori
>Path<==>Percorso< >Path<==>Percorso<
Administration Options==Opzioni amministrazione Administration Options==Opzioni amministrazione
<html lang="en">==<html lang="it"> <html lang="en">==<html lang="it">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Opzioni di amministrazione Administration Options==Opzioni di amministrazione
Delete all==Elimina tutti Delete all==Elimina tutti
>Load Errors<==>Errori di caricamento< >Load Errors<==>Errori di caricamento<

@ -829,14 +829,10 @@ YaCy: Tutorial==YaCy: チュートリアル
>Tutorial==>チュートリアル >Tutorial==>チュートリアル
#----------------------------- #-----------------------------
#File: HostBrowser.html #File: IndexBrowser_p.html
#--------------------------- #---------------------------
Index Browser==索引ブラウザー Index Browser==索引ブラウザー
#Administration Options==管理オプション #Administration Options==管理オプション
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==管理オプション Administration Options==管理オプション
Delete all==全部を削除する Delete all==全部を削除する
>Load Errors<==>読み込みエラー< >Load Errors<==>読み込みエラー<

@ -3987,7 +3987,7 @@
</body> </body>
</file> </file>
<file original="HostBrowser.html" source-language="en" datatype="html"> <file original="IndexBrowser_p.html" source-language="en" datatype="html">
<body> <body>
<trans-unit id="c472c59a" xml:space="preserve" approved="no" translate="yes"> <trans-unit id="c472c59a" xml:space="preserve" approved="no" translate="yes">
<source>Index Browser</source> <source>Index Browser</source>
@ -4106,11 +4106,6 @@
<trans-unit id="9c0d6fd2" xml:space="preserve" approved="no" translate="yes"> <trans-unit id="9c0d6fd2" xml:space="preserve" approved="no" translate="yes">
<source>Host has load error(s)</source> <source>Host has load error(s)</source>
</trans-unit> </trans-unit>
</body>
</file>
<file original="HostBrowserAdmin_p.html" source-language="en" datatype="html">
<body>
<trans-unit id="dfa6a8dc" xml:space="preserve" approved="no" translate="yes"> <trans-unit id="dfa6a8dc" xml:space="preserve" approved="no" translate="yes">
<source>Administration Options</source> <source>Administration Options</source>
</trans-unit> </trans-unit>

@ -1546,7 +1546,7 @@ More Tutorials==Больше инструкций
Please see the tutorials on==Пожалуйста, смотрите больше инструкций на Please see the tutorials on==Пожалуйста, смотрите больше инструкций на
#----------------------------- #-----------------------------
#File: HostBrowser.html #File: IndexBrowser_p.html
#--------------------------- #---------------------------
Index Browser==Просмотр хостов Index Browser==Просмотр хостов
Browse the index of #[ucount]# documents.==В индексе находится #[ucount]# документов. Browse the index of #[ucount]# documents.==В индексе находится #[ucount]# документов.
@ -1590,10 +1590,6 @@ Inbound Links, incoming to #[host]# - Host List==Внутренние ссылк
#browse #[host]#==просмотр #[host]# #browse #[host]#==просмотр #[host]#
##[count]# URLs==#[count]# ссылок ##[count]# URLs==#[count]# ссылок
<html lang="en">==<html lang="ru"> <html lang="en">==<html lang="ru">
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==Расширенные опции Administration Options==Расширенные опции
Delete all==Удалить все Delete all==Удалить все
>Load Errors<==>ошибки загрузки< >Load Errors<==>ошибки загрузки<

@ -1645,7 +1645,7 @@ You can create your own search index with YaCy==您可以用YaCy创建属于自
To learn how to do that, watch one of the demonstration videos below==观看以下demo视频以了解更多 To learn how to do that, watch one of the demonstration videos below==观看以下demo视频以了解更多
#----------------------------- #-----------------------------
#File: HostBrowser.html #File: IndexBrowser_p.html
#--------------------------- #---------------------------
Index Browser==索引浏览器 Index Browser==索引浏览器
Browse the index of #[ucount]# documents.== 浏览来自 #[ucount]# 篇文档的索引. Browse the index of #[ucount]# documents.== 浏览来自 #[ucount]# 篇文档的索引.
@ -1685,10 +1685,6 @@ Inbound Links, incoming to #[host]# - Host List==入站链接,传入#[host]# -
'number of documents about this date'=='在这个日期的文件数量' 'number of documents about this date'=='在这个日期的文件数量'
"show link structure graph"=="展示连接结构图" "show link structure graph"=="展示连接结构图"
Host has load error(s)==主机有加载错误项 Host has load error(s)==主机有加载错误项
#-----------------------------
#File: HostBrowserAdmin_p.html
#---------------------------
Administration Options==管理选项 Administration Options==管理选项
Delete all==全部删除 Delete all==全部删除
>Load Errors<==>加载错误< >Load Errors<==>加载错误<

@ -49,7 +49,7 @@ public enum CollectionSchema implements SchemaDeclaration {
httpstatus_i(SolrType.num_integer, true, true, false, false, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded", true), httpstatus_i(SolrType.num_integer, true, true, false, false, false, "html status return code (i.e. \"200\" for ok), -1 if not loaded", true),
url_file_ext_s(SolrType.string, true, true, false, false, true, "the file name extension", true), url_file_ext_s(SolrType.string, true, true, false, false, true, "the file name extension", true),
host_organization_s(SolrType.string, true, true, false, false, true, "either the second level domain or, if a ccSLD is used, the third level domain", true), // needed to search in the url host_organization_s(SolrType.string, true, true, false, false, true, "either the second level domain or, if a ccSLD is used, the third level domain", true), // needed to search in the url
inboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "internal links, the url only without the protocol", true), // needed for HostBrowser inboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "internal links, the url only without the protocol", true), // needed for IndexBrowser
inboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "internal links, only the protocol", true), // for correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed inboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "internal links, only the protocol", true), // for correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed
outboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "external links, only the protocol", true), // for correct assembly of outboundlinks outboundlinks_protocol_sxt + outboundlinks_urlstub_sxt is needed outboundlinks_protocol_sxt(SolrType.string, true, true, true, false, false, "external links, only the protocol", true), // for correct assembly of outboundlinks outboundlinks_protocol_sxt + outboundlinks_urlstub_sxt is needed
outboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "external links, the url only without the protocol", true), // needed to enhance the crawler outboundlinks_urlstub_sxt(SolrType.string, true, true, true, false, true, "external links, the url only without the protocol", true), // needed to enhance the crawler

Loading…
Cancel
Save