## either the second level domain or, if a ccSLD is used, the third level domain. Needed to search in the url
host_organization_s
## internal links, only the protocol. Needed for HostBrowser
## internal links, only the protocol. Needed for IndexBrowser
inboundlinks_protocol_sxt
## internal links, the url only without the protocol. For correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed
<div>Delete all <spanclass="error">Load Errors</span> from index <inputstyle="width:240px "type="submit"name="deleteLoadErrors"value="Delete Load Errors"class="btn btn-primary"/></div>
for (var i = 0; i <firstChannel.items.length;i++){
item = firstChannel.items[i];
html += "<trclass=\"TableCellLight\"><tdstyle=\"padding:2px;text-align:left;\"><ahref=\"HostBrowser.html?admin=#[admin]#&pathsearch=&path=" +item.link+"\">" + item.link + "<\/a><\/td>";
html += "<trclass=\"TableCellLight\"><tdstyle=\"padding:2px;text-align:left;\"><ahref=\"IndexBrowser_p.html?pathsearch=&path=" +item.link+"\">" + item.link + "<\/a><\/td>";
}
html += "</tbody><\/table>";
}
@ -60,27 +60,12 @@ function updatepage(str) {
<linkhref="env/hypertree.css"rel="stylesheet">
</head>
<bodyid="IndexControl">
#(topmenu)#
<divclass="SubMenu">
<ulclass="SubMenu">
<listyle="width:15%;"><aclass="MenuItemLink"href="index.html"><imgsrc="env/grafics/navsl.gif"height="10px"style="padding-right:10px"align="baseline"/>back to start page</a></li>
<p>Browse the index of #[ucount]# documents. Enter a host or an URL for a file list or view a list of <ahref="HostBrowser.html?admin=#[admin]#&hosts=">all hosts</a>#(authorized)#::, <ahref="HostBrowser.html?admin=#[admin]#&hosts=crawling">only hosts with urls pending in the crawler</a> or <ahref="HostBrowser.html?admin=#[admin]#&hosts=error">only with load errors</a>#(/authorized)#.</p>
<p>Browse the index of #[ucount]# documents. Enter a host or an URL for a file list or view a list of <ahref="IndexBrowser_p.html?hosts=">all hosts</a>#(authorized)#::, <ahref="IndexBrowser_p.html?hosts=crawling">only hosts with urls pending in the crawler</a> or <ahref="IndexBrowser_p.html?hosts=error">only with load errors</a>#(/authorized)#.</p>
<divstyle="width:180px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><imgsrc="env/grafics/#(type)#invisible.png::burn-e.gif::construction.gif#(/type)#"alt="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#"title="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#"style="float:left"width="12"height="8"> <ahref="HostBrowser.html?admin=#[admin]#&path=#[host]#&facetcount=#[count]#">#[host]#</a></div></div>
<divstyle="width:180px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div><imgsrc="env/grafics/#(type)#invisible.png::burn-e.gif::construction.gif#(/type)#"alt="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#"title="#(type)#::Host has load error(s)::Pending in Crawler#(/type)#"style="float:left"width="12"height="8"> <ahref="IndexBrowser_p.html?path=#[host]#&facetcount=#[count]#">#[host]#</a></div></div>
<spanclass="commit"> Documents without Errors</span>
@ -117,7 +102,7 @@ function updatepage(str) {
#(/authorized)#
</div>
</fieldset>
<linkrel="stylesheet"href="env/morris.css">
<scriptsrc="js/raphael.min.js"></script>
<scriptsrc="js/morris.js"></script>
@ -125,100 +110,100 @@ function updatepage(str) {
<divid="graph"style="height:200px"></div>
<script>
var solr= $.getJSON("solr/collection1/select?q=*:*&defType=edismax&start=0&rows=0&wt=json&facet=true&facet.field=dates_in_content_dts&facet.sort=index", function(data) {
#(load)#<tdstyle="text-align:left;white-space:nowrap"colspan="5"class="listingem">link, detected from context</td>::<tdstyle="text-align:left;white-space:nowrap"colspan="5"class="listingnok"><ahref="HostBrowser.html?admin=#[admin]#&load=#[url]#&path=#[path]#">load & index</a>#(/load)#</td>::
#(load)#<tdstyle="text-align:left;white-space:nowrap"colspan="5"class="listingem">link, detected from context</td>::<tdstyle="text-align:left;white-space:nowrap"colspan="5"class="listingnok"><ahref="IndexBrowser_p.html?load=#[url]#&path=#[path]#">load & index</a>#(/load)#</td>::
<div>Delete all <spanclass="error">Load Errors</span> from index <inputstyle="width:240px "type="submit"name="deleteLoadErrors"value="Delete Load Errors"class="btn btn-primary"/></div>
#(showProxy)#::<spanrole="separator"aria-orientation="vertical"> | </span><ahref="proxy.html?url=#[link]#"target="_blank">View via proxy</a>#(/showProxy)#
@ -1000,7 +1000,7 @@ This can be done using the <a href="CrawlStartExpert.html">Expert Crawl Start</a
The scheduler on crawls can be changed or removed using the <a href="Table_API_p.html">API Steering==Le planificateur de balayage peut être modifié ou supprimé au moyen de la <a href="Table_API_p.html">commande de l'API
#---------------------------
#File: HostBrowser.html
#File: IndexBrowser_p.html
#---------------------------
Browse the index of #[ucount]# documents.==Naviguer dans les #[ucount]# documents de l'index.
Enter a host or an URL for a file list or view a list of==Saisir un nom de domaine ou une URL pour obtenir une liste de fichiers, ou visualiser la liste de
@ -1041,10 +1041,6 @@ Inbound Links, incoming to #[host]# - Host List==Liens entrants, vers #[host]# -
'number of documents about this date'=='Nombre de documents liés à cette date'
"show link structure graph"=="Afficher le graphique d'arborescence de liens"
Host has load error(s)==Erreur(s) de chargement sur ce domaine
@ -49,7 +49,7 @@ public enum CollectionSchema implements SchemaDeclaration {
httpstatus_i(SolrType.num_integer,true,true,false,false,false,"html status return code (i.e. \"200\" for ok), -1 if not loaded",true),
url_file_ext_s(SolrType.string,true,true,false,false,true,"the file name extension",true),
host_organization_s(SolrType.string,true,true,false,false,true,"either the second level domain or, if a ccSLD is used, the third level domain",true),// needed to search in the url
inboundlinks_urlstub_sxt(SolrType.string,true,true,true,false,true,"internal links, the url only without the protocol",true),// needed for HostBrowser
inboundlinks_urlstub_sxt(SolrType.string,true,true,true,false,true,"internal links, the url only without the protocol",true),// needed for IndexBrowser
inboundlinks_protocol_sxt(SolrType.string,true,true,true,false,false,"internal links, only the protocol",true),// for correct assembly of inboundlinks inboundlinks_protocol_sxt + inboundlinks_urlstub_sxt is needed
outboundlinks_protocol_sxt(SolrType.string,true,true,true,false,false,"external links, only the protocol",true),// for correct assembly of outboundlinks outboundlinks_protocol_sxt + outboundlinks_urlstub_sxt is needed
outboundlinks_urlstub_sxt(SolrType.string,true,true,true,false,true,"external links, the url only without the protocol",true),// needed to enhance the crawler