summaryrefslogtreecommitdiffstats
path: root/Utilities/cmcurl/lib/urlapi.c
diff options
context:
space:
mode:
authorBrad King <brad.king@kitware.com>2023-02-21 15:01:04 (GMT)
committerBrad King <brad.king@kitware.com>2023-02-21 15:04:36 (GMT)
commitce1550f1783ba7ca981468621da4bc33d065b508 (patch)
treed66a86859605f70063f161d00fe7f61e5ffb32f5 /Utilities/cmcurl/lib/urlapi.c
parent37cceabc6531884045c88c2b461f7e9a7054edd2 (diff)
parent11ba4361aaecf2f1f82ef841146c4c90173d2aca (diff)
downloadCMake-ce1550f1783ba7ca981468621da4bc33d065b508.zip
CMake-ce1550f1783ba7ca981468621da4bc33d065b508.tar.gz
CMake-ce1550f1783ba7ca981468621da4bc33d065b508.tar.bz2
Merge branch 'upstream-curl' into update-curl
* upstream-curl: curl 2023-02-20 (046209e5)
Diffstat (limited to 'Utilities/cmcurl/lib/urlapi.c')
-rw-r--r--Utilities/cmcurl/lib/urlapi.c127
1 files changed, 74 insertions, 53 deletions
diff --git a/Utilities/cmcurl/lib/urlapi.c b/Utilities/cmcurl/lib/urlapi.c
index b96af35..29927b3 100644
--- a/Utilities/cmcurl/lib/urlapi.c
+++ b/Utilities/cmcurl/lib/urlapi.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -33,6 +33,7 @@
#include "inet_pton.h"
#include "inet_ntop.h"
#include "strdup.h"
+#include "idn.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
@@ -116,14 +117,11 @@ static const char *find_host_sep(const char *url)
}
/*
- * Decide in an encoding-independent manner whether a character in a URL must
- * be escaped. This is used in urlencode_str().
+ * Decide whether a character in a URL must be escaped.
*/
-static bool urlchar_needs_escaping(int c)
-{
- return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
-}
+#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
+static const char hexdigits[] = "0123456789abcdef";
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
* spaces in the source URL accordingly.
*
@@ -167,7 +165,10 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
left = FALSE;
if(urlchar_needs_escaping(*iptr)) {
- if(Curl_dyn_addf(o, "%%%02x", *iptr))
+ char out[3]={'%'};
+ out[1] = hexdigits[*iptr>>4];
+ out[2] = hexdigits[*iptr & 0xf];
+ if(Curl_dyn_addn(o, out, 3))
return CURLUE_OUT_OF_MEMORY;
}
else {
@@ -492,35 +493,21 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
bool has_scheme)
{
- char *portptr = NULL;
- char endbracket;
- int len;
+ char *portptr;
char *hostname = Curl_dyn_ptr(host);
/*
* Find the end of an IPv6 address, either on the ']' ending bracket or
* a percent-encoded zone index.
*/
- if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
- &endbracket, &len)) {
- if(']' == endbracket)
- portptr = &hostname[len];
- else if('%' == endbracket) {
- int zonelen = len;
- if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
- if(']' != endbracket)
- return CURLUE_BAD_IPV6;
- portptr = &hostname[--zonelen + len + 1];
- }
- else
- return CURLUE_BAD_IPV6;
- }
- else
+ if(hostname[0] == '[') {
+ portptr = strchr(hostname, ']');
+ if(!portptr)
return CURLUE_BAD_IPV6;
-
+ portptr++;
/* this is a RFC2732-style specified IP-address */
- if(portptr && *portptr) {
+ if(*portptr) {
if(*portptr != ':')
- return CURLUE_BAD_IPV6;
+ return CURLUE_BAD_PORT_NUMBER;
}
else
portptr = NULL;
@@ -584,11 +571,9 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
hostname++;
hlen -= 2;
- if(hostname[hlen] != ']')
- return CURLUE_BAD_IPV6;
-
- /* only valid letters are ok */
+ /* only valid IPv6 letters are ok */
len = strspn(hostname, l);
+
if(hlen != len) {
hlen = len;
if(hostname[len] == '%') {
@@ -602,8 +587,7 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
while(*h && (*h != ']') && (i < 15))
zoneid[i++] = *h++;
if(!i || (']' != *h))
- /* impossible to reach? */
- return CURLUE_MALFORMED_INPUT;
+ return CURLUE_BAD_IPV6;
zoneid[i] = 0;
u->zoneid = strdup(zoneid);
if(!u->zoneid)
@@ -782,25 +766,28 @@ static CURLUcode decode_host(struct dynbuf *host)
*
* RETURNS
*
- * an allocated dedotdotified output string
+ * Zero for success and 'out' set to an allocated dedotdotified string.
*/
-UNITTEST char *dedotdotify(const char *input, size_t clen);
-UNITTEST char *dedotdotify(const char *input, size_t clen)
+UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
+UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
{
- char *out = malloc(clen + 1);
char *outptr;
const char *orginput = input;
char *queryp;
+ char *out;
+
+ *outp = NULL;
+ /* the path always starts with a slash, and a slash has not dot */
+ if((clen < 2) || !memchr(input, '.', clen))
+ return 0;
+
+ out = malloc(clen + 1);
if(!out)
- return NULL; /* out of memory */
+ return 1; /* out of memory */
*out = 0; /* null-terminates, for inputs like "./" */
outptr = out;
- if(!*input)
- /* zero length input string, return that */
- return out;
-
/*
* To handle query-parts properly, we must find it and remove it during the
* dotdot-operation and then append it again at the end to the output
@@ -905,7 +892,8 @@ UNITTEST char *dedotdotify(const char *input, size_t clen)
memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
}
- return out;
+ *outp = out;
+ return 0; /* success */
}
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
@@ -1152,7 +1140,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
size_t qlen = strlen(query) - fraglen; /* includes '?' */
pathlen = strlen(path) - qlen - fraglen;
if(qlen > 1) {
- if(qlen && (flags & CURLU_URLENCODE)) {
+ if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
/* skip the leading question mark */
@@ -1199,8 +1187,8 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
path = u->path = Curl_dyn_ptr(&enc);
}
- if(!pathlen) {
- /* there is no path left, unset */
+ if(pathlen <= 1) {
+ /* there is no path left or just the slash, unset */
path = NULL;
}
else {
@@ -1224,13 +1212,16 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
if(!(flags & CURLU_PATH_AS_IS)) {
/* remove ../ and ./ sequences according to RFC3986 */
- char *newp = dedotdotify((char *)path, pathlen);
- if(!newp) {
+ char *dedot;
+ int err = dedotdotify((char *)path, pathlen, &dedot);
+ if(err) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
- free(u->path);
- u->path = newp;
+ if(dedot) {
+ free(u->path);
+ u->path = dedot;
+ }
}
}
@@ -1379,6 +1370,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
char portbuf[7];
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
+ bool punycode = FALSE;
bool plusdecode = FALSE;
(void)flags;
if(!u)
@@ -1408,6 +1400,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
case CURLUPART_HOST:
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
+ punycode = (flags & CURLU_PUNYCODE)?1:0;
break;
case CURLUPART_ZONEID:
ptr = u->zoneid;
@@ -1460,6 +1453,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
+ punycode = (flags & CURLU_PUNYCODE)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
@@ -1514,6 +1508,17 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
}
+ else if(punycode) {
+ if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ allochost = Curl_idn_decode(u->host);
+ if(!allochost)
+ return CURLUE_OUT_OF_MEMORY;
+#endif
+ }
+ }
else {
/* only encode '%' in output host name */
char *host = u->host;
@@ -1611,6 +1616,19 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
free(*part);
*part = Curl_dyn_ptr(&enc);
}
+ else if(punycode) {
+ if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ char *allochost = Curl_idn_decode(*part);
+ if(!allochost)
+ return CURLUE_OUT_OF_MEMORY;
+ free(*part);
+ *part = allochost;
+#endif
+ }
+ }
return CURLUE_OK;
}
@@ -1807,7 +1825,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
return CURLUE_OUT_OF_MEMORY;
}
else {
- result = Curl_dyn_addf(&enc, "%%%02x", *i);
+ char out[3]={'%'};
+ out[1] = hexdigits[*i>>4];
+ out[2] = hexdigits[*i & 0xf];
+ result = Curl_dyn_addn(&enc, out, 3);
if(result)
return CURLUE_OUT_OF_MEMORY;
}