diff options
author | Brad King <brad.king@kitware.com> | 2023-02-21 15:01:04 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2023-02-21 15:04:36 (GMT) |
commit | ce1550f1783ba7ca981468621da4bc33d065b508 (patch) | |
tree | d66a86859605f70063f161d00fe7f61e5ffb32f5 /Utilities/cmcurl/lib/urlapi.c | |
parent | 37cceabc6531884045c88c2b461f7e9a7054edd2 (diff) | |
parent | 11ba4361aaecf2f1f82ef841146c4c90173d2aca (diff) | |
download | CMake-ce1550f1783ba7ca981468621da4bc33d065b508.zip CMake-ce1550f1783ba7ca981468621da4bc33d065b508.tar.gz CMake-ce1550f1783ba7ca981468621da4bc33d065b508.tar.bz2 |
Merge branch 'upstream-curl' into update-curl
* upstream-curl:
curl 2023-02-20 (046209e5)
Diffstat (limited to 'Utilities/cmcurl/lib/urlapi.c')
-rw-r--r-- | Utilities/cmcurl/lib/urlapi.c | 127 |
1 files changed, 74 insertions, 53 deletions
diff --git a/Utilities/cmcurl/lib/urlapi.c b/Utilities/cmcurl/lib/urlapi.c index b96af35..29927b3 100644 --- a/Utilities/cmcurl/lib/urlapi.c +++ b/Utilities/cmcurl/lib/urlapi.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al. + * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -33,6 +33,7 @@ #include "inet_pton.h" #include "inet_ntop.h" #include "strdup.h" +#include "idn.h" /* The last 3 #include files should be in this order */ #include "curl_printf.h" @@ -116,14 +117,11 @@ static const char *find_host_sep(const char *url) } /* - * Decide in an encoding-independent manner whether a character in a URL must - * be escaped. This is used in urlencode_str(). + * Decide whether a character in a URL must be escaped. */ -static bool urlchar_needs_escaping(int c) -{ - return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); -} +#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c))) +static const char hexdigits[] = "0123456789abcdef"; /* urlencode_str() writes data into an output dynbuf and URL-encodes the * spaces in the source URL accordingly. * @@ -167,7 +165,10 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url, left = FALSE; if(urlchar_needs_escaping(*iptr)) { - if(Curl_dyn_addf(o, "%%%02x", *iptr)) + char out[3]={'%'}; + out[1] = hexdigits[*iptr>>4]; + out[2] = hexdigits[*iptr & 0xf]; + if(Curl_dyn_addn(o, out, 3)) return CURLUE_OUT_OF_MEMORY; } else { @@ -492,35 +493,21 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, bool has_scheme) { - char *portptr = NULL; - char endbracket; - int len; + char *portptr; char *hostname = Curl_dyn_ptr(host); /* * Find the end of an IPv6 address, either on the ']' ending bracket or * a percent-encoded zone index. */ - if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n", - &endbracket, &len)) { - if(']' == endbracket) - portptr = &hostname[len]; - else if('%' == endbracket) { - int zonelen = len; - if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) { - if(']' != endbracket) - return CURLUE_BAD_IPV6; - portptr = &hostname[--zonelen + len + 1]; - } - else - return CURLUE_BAD_IPV6; - } - else + if(hostname[0] == '[') { + portptr = strchr(hostname, ']'); + if(!portptr) return CURLUE_BAD_IPV6; - + portptr++; /* this is a RFC2732-style specified IP-address */ - if(portptr && *portptr) { + if(*portptr) { if(*portptr != ':') - return CURLUE_BAD_IPV6; + return CURLUE_BAD_PORT_NUMBER; } else portptr = NULL; @@ -584,11 +571,9 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, hostname++; hlen -= 2; - if(hostname[hlen] != ']') - return CURLUE_BAD_IPV6; - - /* only valid letters are ok */ + /* only valid IPv6 letters are ok */ len = strspn(hostname, l); + if(hlen != len) { hlen = len; if(hostname[len] == '%') { @@ -602,8 +587,7 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, while(*h && (*h != ']') && (i < 15)) zoneid[i++] = *h++; if(!i || (']' != *h)) - /* impossible to reach? */ - return CURLUE_MALFORMED_INPUT; + return CURLUE_BAD_IPV6; zoneid[i] = 0; u->zoneid = strdup(zoneid); if(!u->zoneid) @@ -782,25 +766,28 @@ static CURLUcode decode_host(struct dynbuf *host) * * RETURNS * - * an allocated dedotdotified output string + * Zero for success and 'out' set to an allocated dedotdotified string. */ -UNITTEST char *dedotdotify(const char *input, size_t clen); -UNITTEST char *dedotdotify(const char *input, size_t clen) +UNITTEST int dedotdotify(const char *input, size_t clen, char **outp); +UNITTEST int dedotdotify(const char *input, size_t clen, char **outp) { - char *out = malloc(clen + 1); char *outptr; const char *orginput = input; char *queryp; + char *out; + + *outp = NULL; + /* the path always starts with a slash, and a slash has not dot */ + if((clen < 2) || !memchr(input, '.', clen)) + return 0; + + out = malloc(clen + 1); if(!out) - return NULL; /* out of memory */ + return 1; /* out of memory */ *out = 0; /* null-terminates, for inputs like "./" */ outptr = out; - if(!*input) - /* zero length input string, return that */ - return out; - /* * To handle query-parts properly, we must find it and remove it during the * dotdot-operation and then append it again at the end to the output @@ -905,7 +892,8 @@ UNITTEST char *dedotdotify(const char *input, size_t clen) memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */ } - return out; + *outp = out; + return 0; /* success */ } static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) @@ -1152,7 +1140,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) size_t qlen = strlen(query) - fraglen; /* includes '?' */ pathlen = strlen(path) - qlen - fraglen; if(qlen > 1) { - if(qlen && (flags & CURLU_URLENCODE)) { + if(flags & CURLU_URLENCODE) { struct dynbuf enc; Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); /* skip the leading question mark */ @@ -1199,8 +1187,8 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) path = u->path = Curl_dyn_ptr(&enc); } - if(!pathlen) { - /* there is no path left, unset */ + if(pathlen <= 1) { + /* there is no path left or just the slash, unset */ path = NULL; } else { @@ -1224,13 +1212,16 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) if(!(flags & CURLU_PATH_AS_IS)) { /* remove ../ and ./ sequences according to RFC3986 */ - char *newp = dedotdotify((char *)path, pathlen); - if(!newp) { + char *dedot; + int err = dedotdotify((char *)path, pathlen, &dedot); + if(err) { result = CURLUE_OUT_OF_MEMORY; goto fail; } - free(u->path); - u->path = newp; + if(dedot) { + free(u->path); + u->path = dedot; + } } } @@ -1379,6 +1370,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, char portbuf[7]; bool urldecode = (flags & CURLU_URLDECODE)?1:0; bool urlencode = (flags & CURLU_URLENCODE)?1:0; + bool punycode = FALSE; bool plusdecode = FALSE; (void)flags; if(!u) @@ -1408,6 +1400,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, case CURLUPART_HOST: ptr = u->host; ifmissing = CURLUE_NO_HOST; + punycode = (flags & CURLU_PUNYCODE)?1:0; break; case CURLUPART_ZONEID: ptr = u->zoneid; @@ -1460,6 +1453,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, char *options = u->options; char *port = u->port; char *allochost = NULL; + punycode = (flags & CURLU_PUNYCODE)?1:0; if(u->scheme && strcasecompare("file", u->scheme)) { url = aprintf("file://%s%s%s", u->path, @@ -1514,6 +1508,17 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, if(!allochost) return CURLUE_OUT_OF_MEMORY; } + else if(punycode) { + if(!Curl_is_ASCII_name(u->host)) { +#ifndef USE_IDN + return CURLUE_LACKS_IDN; +#else + allochost = Curl_idn_decode(u->host); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; +#endif + } + } else { /* only encode '%' in output host name */ char *host = u->host; @@ -1611,6 +1616,19 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, free(*part); *part = Curl_dyn_ptr(&enc); } + else if(punycode) { + if(!Curl_is_ASCII_name(u->host)) { +#ifndef USE_IDN + return CURLUE_LACKS_IDN; +#else + char *allochost = Curl_idn_decode(*part); + if(!allochost) + return CURLUE_OUT_OF_MEMORY; + free(*part); + *part = allochost; +#endif + } + } return CURLUE_OK; } @@ -1807,7 +1825,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, return CURLUE_OUT_OF_MEMORY; } else { - result = Curl_dyn_addf(&enc, "%%%02x", *i); + char out[3]={'%'}; + out[1] = hexdigits[*i>>4]; + out[2] = hexdigits[*i & 0xf]; + result = Curl_dyn_addn(&enc, out, 3); if(result) return CURLUE_OUT_OF_MEMORY; } |