From f03e16d3dfce9bf3d8d5b3535954f1c0327aa553 Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Thu, 16 Jan 2020 20:59:02 +0100
Subject: [PATCH] enhanced crawl start url check experience urls are now
 urlencoded and a check is also performed in case that an url is copied into
 the url field using copy-paste

---
 htroot/CrawlStartExpert.html | 2 +-
 htroot/CrawlStartSite.html   | 2 +-
 htroot/js/IndexCreate.js     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/htroot/CrawlStartExpert.html b/htroot/CrawlStartExpert.html
index 7c644ac11..33faf1c87 100644
--- a/htroot/CrawlStartExpert.html
+++ b/htroot/CrawlStartExpert.html
@@ -245,7 +245,7 @@
             Other already visited URLs are sorted out as "double", if they are not allowed using the re-crawl option.
           </span></span>
               <input type="radio" style="vertical-align: top" name="crawlingMode" id="url" value="url" #(crawlingMode_url)#::checked="checked"#(/crawlingMode_url)# />
-              <textarea name="crawlingURL" id="crawlingURL" cols="64" rows="3" onkeypress="changed()">#[starturl]#</textarea>
+              <textarea name="crawlingURL" id="crawlingURL" cols="64" rows="3" onkeydown="changed()">#[starturl]#</textarea>
                 &nbsp;
                 <span id="robotsOK"></span>
 	            <span id="title"><br/></span>
diff --git a/htroot/CrawlStartSite.html b/htroot/CrawlStartSite.html
index c834a4381..7532dc29c 100644
--- a/htroot/CrawlStartSite.html
+++ b/htroot/CrawlStartSite.html
@@ -38,7 +38,7 @@
             <td valign="top"><input type="radio" name="crawlingMode" id="url" value="url" checked="checked"
             onmousedown="document.getElementById('rangeDomain').disabled=false;document.getElementById('rangeSubpath').disabled=false;document.getElementById('crawlingDomMaxCheck').disabled=false;document.getElementById('crawlingDomMaxPages').disabled=false;"/>Start URL&nbsp;(must start with<br/>http:// https:// ftp:// smb:// file://)</td>
             <td valign="top">
-            <input name="crawlingURL" id="crawlingURL" type="text" size="50" maxlength="256" value="#[starturl]#" onkeypress="changed()" onfocus="check('url')" style="font-size:16px"/><br/>
+            <input name="crawlingURL" id="crawlingURL" type="text" size="50" maxlength="256" value="#[starturl]#" onkeydown="changed()" onfocus="check('url')" style="font-size:16px"/><br/>
             <input name="bookmarkTitle" id="bookmarkTitle" type="text" size="50" maxlength="256" value="" readonly="readonly" style="background:transparent; border:0px"/>
             </td>
 	        <td>
diff --git a/htroot/js/IndexCreate.js b/htroot/js/IndexCreate.js
index f2b7dae65..753365ba9 100644
--- a/htroot/js/IndexCreate.js
+++ b/htroot/js/IndexCreate.js
@@ -135,6 +135,6 @@ function loadInfos(loadAll) {
 	
 	var url=document.getElementById("crawlingURL").value;
 	if (url.indexOf("ftp") == 0 || url.indexOf("smb") == 0) document.getElementById("crawlingQ").checked = true; // since the pdf parser update for page separation, we need to set this
-	sndReq('api/getpageinfo_p.xml?actions=title,robots' + (loadAll ? '' : '&maxLinks=50') + '&url='+url);
+	sndReq('api/getpageinfo_p.xml?actions=title,robots' + (loadAll ? '' : '&maxLinks=50') + '&url=' + encodeURIComponent(url));
 	document.getElementById("api").innerHTML = "<a href='api/getpageinfo_p.xml?actions=title,robots&url=" + url + "' id='apilink'><img src='env/grafics/api.png' width='60' height='40' alt='API'/></a><span>See the page info about the start url.</span>";
 }