diff --git a/htroot/CrawlStartSite.html b/htroot/CrawlStartSite.html index 257d80da5..dddbc4ff2 100644 --- a/htroot/CrawlStartSite.html +++ b/htroot/CrawlStartSite.html @@ -20,6 +20,7 @@ #%env/templates/header.template%# #%env/templates/submenuCrawler.template%# +

Site Crawling

diff --git a/htroot/api/getpageinfo.java b/htroot/api/getpageinfo.java index c511c5e75..3c1ec6ac5 100644 --- a/htroot/api/getpageinfo.java +++ b/htroot/api/getpageinfo.java @@ -65,7 +65,6 @@ public class getpageinfo { prop.put("lang", ""); prop.put("robots-allowed", "3"); //unknown prop.put("robotsInfo", ""); //unknown - prop.put("sitemap", ""); prop.put("favicon",""); prop.put("sitelist", ""); prop.put("filter", ".*"); diff --git a/htroot/js/IndexCreate.js b/htroot/js/IndexCreate.js index e3c840fab..bdb367d96 100644 --- a/htroot/js/IndexCreate.js +++ b/htroot/js/IndexCreate.js @@ -45,12 +45,15 @@ function handleResponse(){ // get the sitemap URL contained in the robots.txt if (document.getElementsByName("sitemapURL").length > 0) { - sitemap=""; - if (response.getElementsByTagName("sitemap")[0].firstChild!=null){ - sitemap=response.getElementsByTagName("sitemap")[0].firstChild.nodeValue; - } - document.getElementsByName("sitemapURL")[0].value=sitemap; - if (sitemap) document.getElementById("sitemap").disabled=false; + sitemap=""; + // there can be zero, one or many sitemaps + sitemapElement = response.getElementsByTagName("sitemap"); + if (sitemapElement != null && sitemapElement.length > 0 && sitemapElement[0].firstChild != null) { + // if there are several, we take only the first + sitemap = sitemapElement[0].firstChild.nodeValue; + } + document.getElementsByName("sitemapURL")[0].value = sitemap; + if (sitemap) document.getElementById("sitemap").disabled = false; } sitelist=""; if (response.getElementsByTagName("sitelist")[0].firstChild!=null){ @@ -77,5 +80,4 @@ function loadInfos() { if (url.indexOf("ftp") == 0 || url.indexOf("smb") == 0) document.getElementById("crawlingQ").disabled=true; else document.getElementById("crawlingQ").disabled=false; sndReq('/api/getpageinfo_p.xml?actions=title,robots&url='+url); document.getElementById("api").innerHTML = "APISee the page info about the start url."; - }