From f03e16d3dfce9bf3d8d5b3535954f1c0327aa553 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Thu, 16 Jan 2020 20:59:02 +0100 Subject: [PATCH] enhanced crawl start url check experience urls are now urlencoded and a check is also performed in case that an url is copied into the url field using copy-paste --- htroot/CrawlStartExpert.html | 2 +- htroot/CrawlStartSite.html | 2 +- htroot/js/IndexCreate.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/htroot/CrawlStartExpert.html b/htroot/CrawlStartExpert.html index 7c644ac11..33faf1c87 100644 --- a/htroot/CrawlStartExpert.html +++ b/htroot/CrawlStartExpert.html @@ -245,7 +245,7 @@ Other already visited URLs are sorted out as "double", if they are not allowed using the re-crawl option. - +  
diff --git a/htroot/CrawlStartSite.html b/htroot/CrawlStartSite.html index c834a4381..7532dc29c 100644 --- a/htroot/CrawlStartSite.html +++ b/htroot/CrawlStartSite.html @@ -38,7 +38,7 @@ Start URL (must start with
http:// https:// ftp:// smb:// file://) -
+
diff --git a/htroot/js/IndexCreate.js b/htroot/js/IndexCreate.js index f2b7dae65..753365ba9 100644 --- a/htroot/js/IndexCreate.js +++ b/htroot/js/IndexCreate.js @@ -135,6 +135,6 @@ function loadInfos(loadAll) { var url=document.getElementById("crawlingURL").value; if (url.indexOf("ftp") == 0 || url.indexOf("smb") == 0) document.getElementById("crawlingQ").checked = true; // since the pdf parser update for page separation, we need to set this - sndReq('api/getpageinfo_p.xml?actions=title,robots' + (loadAll ? '' : '&maxLinks=50') + '&url='+url); + sndReq('api/getpageinfo_p.xml?actions=title,robots' + (loadAll ? '' : '&maxLinks=50') + '&url=' + encodeURIComponent(url)); document.getElementById("api").innerHTML = "APISee the page info about the start url."; }