summaryrefslogtreecommitdiffstats
path: root/lib/urlapi.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/urlapi.c')
-rw-r--r--lib/urlapi.c237
1 files changed, 143 insertions, 94 deletions
diff --git a/lib/urlapi.c b/lib/urlapi.c
index d07e4f5..506e244 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -29,6 +29,7 @@
#include "url.h"
#include "escape.h"
#include "curl_ctype.h"
+#include "inet_pton.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
@@ -63,6 +64,7 @@ struct Curl_URL {
char *fragment;
char *scratch; /* temporary scratch area */
+ char *temppath; /* temporary path pointer */
long portnum; /* the numerical version */
};
@@ -81,6 +83,7 @@ static void free_urlhandle(struct Curl_URL *u)
free(u->query);
free(u->fragment);
free(u->scratch);
+ free(u->temppath);
}
/* move the full contents of one handle onto another and
@@ -350,7 +353,7 @@ static char *concat_url(const char *base, const char *relurl)
else {
/* We got a new absolute path for this server */
- if((relurl[0] == '/') && (relurl[1] == '/')) {
+ if(relurl[1] == '/') {
/* the new URL starts with //, just keep the protocol part from the
original one */
*protsep = 0;
@@ -425,7 +428,6 @@ static char *concat_url(const char *base, const char *relurl)
*
*/
static CURLUcode parse_hostname_login(struct Curl_URL *u,
- const struct Curl_handler *h,
char **hostname,
unsigned int flags)
{
@@ -434,6 +436,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
char *userp = NULL;
char *passwdp = NULL;
char *optionsp = NULL;
+ const struct Curl_handler *h = NULL;
/* At this point, we're hoping all the other special cases have
* been taken care of, so conn->host.name is at most
@@ -453,6 +456,10 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
* ftp://user:password@ftp.my.site:8021/README */
*hostname = ++ptr;
+ /* if this is a known scheme, get some details */
+ if(u->scheme)
+ h = Curl_builtin_scheme(u->scheme);
+
/* We could use the login information in the URL so extract it. Only parse
options if the handler says we should. Note that 'h' might be NULL! */
ccode = Curl_parse_login_details(login, ptr - login - 1,
@@ -568,7 +575,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname)
}
/* scan for byte values < 31 or 127 */
-static CURLUcode junkscan(char *part)
+static CURLUcode junkscan(const char *part)
{
if(part) {
static const char badbytes[]={
@@ -591,20 +598,26 @@ static CURLUcode junkscan(char *part)
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
{
- const char *l = NULL; /* accepted characters */
size_t len;
size_t hlen = strlen(hostname);
if(hostname[0] == '[') {
+#ifdef ENABLE_IPV6
+ char dest[16]; /* fits a binary IPv6 address */
+#endif
+ const char *l = "0123456789abcdefABCDEF:.";
+ if(hlen < 5) /* '[::1]' is the shortest possible valid string */
+ return CURLUE_MALFORMED_INPUT;
hostname++;
- l = "0123456789abcdefABCDEF::.";
hlen -= 2;
- }
- if(l) {
+ if(hostname[hlen] != ']')
+ return CURLUE_MALFORMED_INPUT;
+
/* only valid letters are ok */
len = strspn(hostname, l);
if(hlen != len) {
+ hlen = len;
if(hostname[len] == '%') {
/* this could now be '%[zone id]' */
char zoneid[16];
@@ -628,6 +641,12 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
return CURLUE_MALFORMED_INPUT;
/* hostname is fine */
}
+#ifdef ENABLE_IPV6
+ hostname[hlen] = 0; /* end the address there */
+ if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
+ return CURLUE_MALFORMED_INPUT;
+ hostname[hlen] = ']'; /* restore ending bracket */
+#endif
}
else {
/* letters from the second string is not ok */
@@ -653,10 +672,9 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
CURLUcode result;
bool url_has_scheme = FALSE;
char schemebuf[MAX_SCHEME_LEN + 1];
- char *schemep = NULL;
+ const char *schemep = NULL;
size_t schemelen = 0;
size_t urllen;
- const struct Curl_handler *h = NULL;
if(!url)
return CURLUE_MALFORMED_INPUT;
@@ -775,6 +793,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(junkscan(schemep))
return CURLUE_MALFORMED_INPUT;
+
}
else {
/* no scheme! */
@@ -782,7 +801,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
return CURLUE_MALFORMED_INPUT;
if(flags & CURLU_DEFAULT_SCHEME)
- schemep = (char *) DEFAULT_SCHEME;
+ schemep = DEFAULT_SCHEME;
/*
* The URL was badly formatted, let's try without scheme specified.
@@ -795,74 +814,83 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
p++;
len = p - hostp;
- if(!len)
- return CURLUE_MALFORMED_INPUT;
-
- memcpy(hostname, hostp, len);
- hostname[len] = 0;
-
- if((flags & CURLU_GUESS_SCHEME) && !schemep) {
- /* legacy curl-style guess based on host name */
- if(checkprefix("ftp.", hostname))
- schemep = (char *)"ftp";
- else if(checkprefix("dict.", hostname))
- schemep = (char *)"dict";
- else if(checkprefix("ldap.", hostname))
- schemep = (char *)"ldap";
- else if(checkprefix("imap.", hostname))
- schemep = (char *)"imap";
- else if(checkprefix("smtp.", hostname))
- schemep = (char *)"smtp";
- else if(checkprefix("pop3.", hostname))
- schemep = (char *)"pop3";
- else
- schemep = (char *)"http";
+ if(len) {
+ memcpy(hostname, hostp, len);
+ hostname[len] = 0;
+ }
+ else {
+ if(!(flags & CURLU_NO_AUTHORITY))
+ return CURLUE_MALFORMED_INPUT;
}
len = strlen(p);
memcpy(path, p, len);
path[len] = 0;
- u->scheme = strdup(schemep);
- if(!u->scheme)
- return CURLUE_OUT_OF_MEMORY;
+ if(schemep) {
+ u->scheme = strdup(schemep);
+ if(!u->scheme)
+ return CURLUE_OUT_OF_MEMORY;
+ }
}
- /* if this is a known scheme, get some details */
- h = Curl_builtin_scheme(u->scheme);
-
if(junkscan(path))
return CURLUE_MALFORMED_INPUT;
- query = strchr(path, '?');
- if(query)
- *query++ = 0;
+ if((flags & CURLU_URLENCODE) && path[0]) {
+ /* worst case output length is 3x the original! */
+ char *newp = malloc(strlen(path) * 3);
+ if(!newp)
+ return CURLUE_OUT_OF_MEMORY;
+ path_alloced = TRUE;
+ strcpy_url(newp, path, TRUE); /* consider it relative */
+ u->temppath = path = newp;
+ }
- fragment = strchr(query?query:path, '#');
- if(fragment)
+ fragment = strchr(path, '#');
+ if(fragment) {
*fragment++ = 0;
+ if(fragment[0]) {
+ u->fragment = strdup(fragment);
+ if(!u->fragment)
+ return CURLUE_OUT_OF_MEMORY;
+ }
+ }
+
+ query = strchr(path, '?');
+ if(query) {
+ *query++ = 0;
+ /* done even if the query part is a blank string */
+ u->query = strdup(query);
+ if(!u->query)
+ return CURLUE_OUT_OF_MEMORY;
+ }
if(!path[0])
- /* if there's no path set, unset */
+ /* if there's no path left set, unset */
path = NULL;
- else if(!(flags & CURLU_PATH_AS_IS)) {
- /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
- char *newp = Curl_dedotdotify(path);
- if(!newp)
- return CURLUE_OUT_OF_MEMORY;
+ else {
+ if(!(flags & CURLU_PATH_AS_IS)) {
+ /* remove ../ and ./ sequences according to RFC3986 */
+ char *newp = Curl_dedotdotify(path);
+ if(!newp)
+ return CURLUE_OUT_OF_MEMORY;
- if(strcmp(newp, path)) {
- /* if we got a new version */
- path = newp;
- path_alloced = TRUE;
+ if(strcmp(newp, path)) {
+ /* if we got a new version */
+ if(path_alloced)
+ Curl_safefree(u->temppath);
+ u->temppath = path = newp;
+ path_alloced = TRUE;
+ }
+ else
+ free(newp);
}
- else
- free(newp);
- }
- if(path) {
+
u->path = path_alloced?path:strdup(path);
if(!u->path)
return CURLUE_OUT_OF_MEMORY;
+ u->temppath = NULL; /* used now */
}
if(hostname) {
@@ -872,7 +900,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(junkscan(hostname))
return CURLUE_MALFORMED_INPUT;
- result = parse_hostname_login(u, h, &hostname, flags);
+ result = parse_hostname_login(u, &hostname, flags);
if(result)
return result;
@@ -880,28 +908,44 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(result)
return result;
- result = hostname_check(u, hostname);
- if(result)
- return result;
+ if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
+ /* Skip hostname check, it's allowed to be empty. */
+ }
+ else {
+ result = hostname_check(u, hostname);
+ if(result)
+ return result;
+ }
u->host = strdup(hostname);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;
- }
- if(query) {
- u->query = strdup(query);
- if(!u->query)
- return CURLUE_OUT_OF_MEMORY;
- }
- if(fragment && fragment[0]) {
- u->fragment = strdup(fragment);
- if(!u->fragment)
- return CURLUE_OUT_OF_MEMORY;
+ if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+ /* legacy curl-style guess based on host name */
+ if(checkprefix("ftp.", hostname))
+ schemep = "ftp";
+ else if(checkprefix("dict.", hostname))
+ schemep = "dict";
+ else if(checkprefix("ldap.", hostname))
+ schemep = "ldap";
+ else if(checkprefix("imap.", hostname))
+ schemep = "imap";
+ else if(checkprefix("smtp.", hostname))
+ schemep = "smtp";
+ else if(checkprefix("pop3.", hostname))
+ schemep = "pop3";
+ else
+ schemep = "http";
+
+ u->scheme = strdup(schemep);
+ if(!u->scheme)
+ return CURLUE_OUT_OF_MEMORY;
+ }
}
- free(u->scratch);
- u->scratch = NULL;
+ Curl_safefree(u->scratch);
+ Curl_safefree(u->temppath);
return CURLUE_OK;
}
@@ -1066,24 +1110,23 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
else
return CURLUE_NO_SCHEME;
- if(scheme) {
- h = Curl_builtin_scheme(scheme);
- if(!port && (flags & CURLU_DEFAULT_PORT)) {
- /* there's no stored port number, but asked to deliver
- a default one for the scheme */
- if(h) {
- msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
- port = portbuf;
- }
- }
- else if(port) {
- /* there is a stored port number, but asked to inhibit if it matches
- the default one for the scheme */
- if(h && (h->defport == u->portnum) &&
- (flags & CURLU_NO_DEFAULT_PORT))
- port = NULL;
+ h = Curl_builtin_scheme(scheme);
+ if(!port && (flags & CURLU_DEFAULT_PORT)) {
+ /* there's no stored port number, but asked to deliver
+ a default one for the scheme */
+ if(h) {
+ msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
+ port = portbuf;
}
}
+ else if(port) {
+ /* there is a stored port number, but asked to inhibit if it matches
+ the default one for the scheme */
+ if(h && (h->defport == u->portnum) &&
+ (flags & CURLU_NO_DEFAULT_PORT))
+ port = NULL;
+ }
+
if(h && !(h->flags & PROTOPT_URLOPTIONS))
options = NULL;
@@ -1331,7 +1374,8 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
default:
return CURLUE_UNKNOWN_PART;
}
- if(storep) {
+ DEBUGASSERT(storep);
+ {
const char *newp = part;
size_t nalloc = strlen(part);
@@ -1423,9 +1467,14 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
}
if(what == CURLUPART_HOST) {
- if(hostname_check(u, (char *)newp)) {
- free((char *)newp);
- return CURLUE_MALFORMED_INPUT;
+ if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
+ /* Skip hostname check, it's allowed to be empty. */
+ }
+ else {
+ if(hostname_check(u, (char *)newp)) {
+ free((char *)newp);
+ return CURLUE_MALFORMED_INPUT;
+ }
}
}