diff options
Diffstat (limited to 'lib/urlapi.c')
-rw-r--r-- | lib/urlapi.c | 237 |
1 files changed, 143 insertions, 94 deletions
diff --git a/lib/urlapi.c b/lib/urlapi.c index d07e4f5..506e244 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al. + * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -29,6 +29,7 @@ #include "url.h" #include "escape.h" #include "curl_ctype.h" +#include "inet_pton.h" /* The last 3 #include files should be in this order */ #include "curl_printf.h" @@ -63,6 +64,7 @@ struct Curl_URL { char *fragment; char *scratch; /* temporary scratch area */ + char *temppath; /* temporary path pointer */ long portnum; /* the numerical version */ }; @@ -81,6 +83,7 @@ static void free_urlhandle(struct Curl_URL *u) free(u->query); free(u->fragment); free(u->scratch); + free(u->temppath); } /* move the full contents of one handle onto another and @@ -350,7 +353,7 @@ static char *concat_url(const char *base, const char *relurl) else { /* We got a new absolute path for this server */ - if((relurl[0] == '/') && (relurl[1] == '/')) { + if(relurl[1] == '/') { /* the new URL starts with //, just keep the protocol part from the original one */ *protsep = 0; @@ -425,7 +428,6 @@ static char *concat_url(const char *base, const char *relurl) * */ static CURLUcode parse_hostname_login(struct Curl_URL *u, - const struct Curl_handler *h, char **hostname, unsigned int flags) { @@ -434,6 +436,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, char *userp = NULL; char *passwdp = NULL; char *optionsp = NULL; + const struct Curl_handler *h = NULL; /* At this point, we're hoping all the other special cases have * been taken care of, so conn->host.name is at most @@ -453,6 +456,10 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, * ftp://user:password@ftp.my.site:8021/README */ *hostname = ++ptr; + /* if this is a known scheme, get some details */ + if(u->scheme) + h = Curl_builtin_scheme(u->scheme); + /* We could use the login information in the URL so extract it. Only parse options if the handler says we should. Note that 'h' might be NULL! */ ccode = Curl_parse_login_details(login, ptr - login - 1, @@ -568,7 +575,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) } /* scan for byte values < 31 or 127 */ -static CURLUcode junkscan(char *part) +static CURLUcode junkscan(const char *part) { if(part) { static const char badbytes[]={ @@ -591,20 +598,26 @@ static CURLUcode junkscan(char *part) static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) { - const char *l = NULL; /* accepted characters */ size_t len; size_t hlen = strlen(hostname); if(hostname[0] == '[') { +#ifdef ENABLE_IPV6 + char dest[16]; /* fits a binary IPv6 address */ +#endif + const char *l = "0123456789abcdefABCDEF:."; + if(hlen < 5) /* '[::1]' is the shortest possible valid string */ + return CURLUE_MALFORMED_INPUT; hostname++; - l = "0123456789abcdefABCDEF::."; hlen -= 2; - } - if(l) { + if(hostname[hlen] != ']') + return CURLUE_MALFORMED_INPUT; + /* only valid letters are ok */ len = strspn(hostname, l); if(hlen != len) { + hlen = len; if(hostname[len] == '%') { /* this could now be '%[zone id]' */ char zoneid[16]; @@ -628,6 +641,12 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) return CURLUE_MALFORMED_INPUT; /* hostname is fine */ } +#ifdef ENABLE_IPV6 + hostname[hlen] = 0; /* end the address there */ + if(1 != Curl_inet_pton(AF_INET6, hostname, dest)) + return CURLUE_MALFORMED_INPUT; + hostname[hlen] = ']'; /* restore ending bracket */ +#endif } else { /* letters from the second string is not ok */ @@ -653,10 +672,9 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) CURLUcode result; bool url_has_scheme = FALSE; char schemebuf[MAX_SCHEME_LEN + 1]; - char *schemep = NULL; + const char *schemep = NULL; size_t schemelen = 0; size_t urllen; - const struct Curl_handler *h = NULL; if(!url) return CURLUE_MALFORMED_INPUT; @@ -775,6 +793,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) if(junkscan(schemep)) return CURLUE_MALFORMED_INPUT; + } else { /* no scheme! */ @@ -782,7 +801,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) return CURLUE_MALFORMED_INPUT; if(flags & CURLU_DEFAULT_SCHEME) - schemep = (char *) DEFAULT_SCHEME; + schemep = DEFAULT_SCHEME; /* * The URL was badly formatted, let's try without scheme specified. @@ -795,74 +814,83 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) p++; len = p - hostp; - if(!len) - return CURLUE_MALFORMED_INPUT; - - memcpy(hostname, hostp, len); - hostname[len] = 0; - - if((flags & CURLU_GUESS_SCHEME) && !schemep) { - /* legacy curl-style guess based on host name */ - if(checkprefix("ftp.", hostname)) - schemep = (char *)"ftp"; - else if(checkprefix("dict.", hostname)) - schemep = (char *)"dict"; - else if(checkprefix("ldap.", hostname)) - schemep = (char *)"ldap"; - else if(checkprefix("imap.", hostname)) - schemep = (char *)"imap"; - else if(checkprefix("smtp.", hostname)) - schemep = (char *)"smtp"; - else if(checkprefix("pop3.", hostname)) - schemep = (char *)"pop3"; - else - schemep = (char *)"http"; + if(len) { + memcpy(hostname, hostp, len); + hostname[len] = 0; + } + else { + if(!(flags & CURLU_NO_AUTHORITY)) + return CURLUE_MALFORMED_INPUT; } len = strlen(p); memcpy(path, p, len); path[len] = 0; - u->scheme = strdup(schemep); - if(!u->scheme) - return CURLUE_OUT_OF_MEMORY; + if(schemep) { + u->scheme = strdup(schemep); + if(!u->scheme) + return CURLUE_OUT_OF_MEMORY; + } } - /* if this is a known scheme, get some details */ - h = Curl_builtin_scheme(u->scheme); - if(junkscan(path)) return CURLUE_MALFORMED_INPUT; - query = strchr(path, '?'); - if(query) - *query++ = 0; + if((flags & CURLU_URLENCODE) && path[0]) { + /* worst case output length is 3x the original! */ + char *newp = malloc(strlen(path) * 3); + if(!newp) + return CURLUE_OUT_OF_MEMORY; + path_alloced = TRUE; + strcpy_url(newp, path, TRUE); /* consider it relative */ + u->temppath = path = newp; + } - fragment = strchr(query?query:path, '#'); - if(fragment) + fragment = strchr(path, '#'); + if(fragment) { *fragment++ = 0; + if(fragment[0]) { + u->fragment = strdup(fragment); + if(!u->fragment) + return CURLUE_OUT_OF_MEMORY; + } + } + + query = strchr(path, '?'); + if(query) { + *query++ = 0; + /* done even if the query part is a blank string */ + u->query = strdup(query); + if(!u->query) + return CURLUE_OUT_OF_MEMORY; + } if(!path[0]) - /* if there's no path set, unset */ + /* if there's no path left set, unset */ path = NULL; - else if(!(flags & CURLU_PATH_AS_IS)) { - /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ - char *newp = Curl_dedotdotify(path); - if(!newp) - return CURLUE_OUT_OF_MEMORY; + else { + if(!(flags & CURLU_PATH_AS_IS)) { + /* remove ../ and ./ sequences according to RFC3986 */ + char *newp = Curl_dedotdotify(path); + if(!newp) + return CURLUE_OUT_OF_MEMORY; - if(strcmp(newp, path)) { - /* if we got a new version */ - path = newp; - path_alloced = TRUE; + if(strcmp(newp, path)) { + /* if we got a new version */ + if(path_alloced) + Curl_safefree(u->temppath); + u->temppath = path = newp; + path_alloced = TRUE; + } + else + free(newp); } - else - free(newp); - } - if(path) { + u->path = path_alloced?path:strdup(path); if(!u->path) return CURLUE_OUT_OF_MEMORY; + u->temppath = NULL; /* used now */ } if(hostname) { @@ -872,7 +900,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) if(junkscan(hostname)) return CURLUE_MALFORMED_INPUT; - result = parse_hostname_login(u, h, &hostname, flags); + result = parse_hostname_login(u, &hostname, flags); if(result) return result; @@ -880,28 +908,44 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) if(result) return result; - result = hostname_check(u, hostname); - if(result) - return result; + if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) { + /* Skip hostname check, it's allowed to be empty. */ + } + else { + result = hostname_check(u, hostname); + if(result) + return result; + } u->host = strdup(hostname); if(!u->host) return CURLUE_OUT_OF_MEMORY; - } - if(query) { - u->query = strdup(query); - if(!u->query) - return CURLUE_OUT_OF_MEMORY; - } - if(fragment && fragment[0]) { - u->fragment = strdup(fragment); - if(!u->fragment) - return CURLUE_OUT_OF_MEMORY; + if((flags & CURLU_GUESS_SCHEME) && !schemep) { + /* legacy curl-style guess based on host name */ + if(checkprefix("ftp.", hostname)) + schemep = "ftp"; + else if(checkprefix("dict.", hostname)) + schemep = "dict"; + else if(checkprefix("ldap.", hostname)) + schemep = "ldap"; + else if(checkprefix("imap.", hostname)) + schemep = "imap"; + else if(checkprefix("smtp.", hostname)) + schemep = "smtp"; + else if(checkprefix("pop3.", hostname)) + schemep = "pop3"; + else + schemep = "http"; + + u->scheme = strdup(schemep); + if(!u->scheme) + return CURLUE_OUT_OF_MEMORY; + } } - free(u->scratch); - u->scratch = NULL; + Curl_safefree(u->scratch); + Curl_safefree(u->temppath); return CURLUE_OK; } @@ -1066,24 +1110,23 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, else return CURLUE_NO_SCHEME; - if(scheme) { - h = Curl_builtin_scheme(scheme); - if(!port && (flags & CURLU_DEFAULT_PORT)) { - /* there's no stored port number, but asked to deliver - a default one for the scheme */ - if(h) { - msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); - port = portbuf; - } - } - else if(port) { - /* there is a stored port number, but asked to inhibit if it matches - the default one for the scheme */ - if(h && (h->defport == u->portnum) && - (flags & CURLU_NO_DEFAULT_PORT)) - port = NULL; + h = Curl_builtin_scheme(scheme); + if(!port && (flags & CURLU_DEFAULT_PORT)) { + /* there's no stored port number, but asked to deliver + a default one for the scheme */ + if(h) { + msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); + port = portbuf; } } + else if(port) { + /* there is a stored port number, but asked to inhibit if it matches + the default one for the scheme */ + if(h && (h->defport == u->portnum) && + (flags & CURLU_NO_DEFAULT_PORT)) + port = NULL; + } + if(h && !(h->flags & PROTOPT_URLOPTIONS)) options = NULL; @@ -1331,7 +1374,8 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, default: return CURLUE_UNKNOWN_PART; } - if(storep) { + DEBUGASSERT(storep); + { const char *newp = part; size_t nalloc = strlen(part); @@ -1423,9 +1467,14 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, } if(what == CURLUPART_HOST) { - if(hostname_check(u, (char *)newp)) { - free((char *)newp); - return CURLUE_MALFORMED_INPUT; + if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) { + /* Skip hostname check, it's allowed to be empty. */ + } + else { + if(hostname_check(u, (char *)newp)) { + free((char *)newp); + return CURLUE_MALFORMED_INPUT; + } } } |