diff options
Diffstat (limited to 'Utilities/cmcurl/lib/urlapi.c')
-rw-r--r-- | Utilities/cmcurl/lib/urlapi.c | 904 |
1 files changed, 548 insertions, 356 deletions
diff --git a/Utilities/cmcurl/lib/urlapi.c b/Utilities/cmcurl/lib/urlapi.c index 2a36de6..7dac81c 100644 --- a/Utilities/cmcurl/lib/urlapi.c +++ b/Utilities/cmcurl/lib/urlapi.c @@ -18,6 +18,8 @@ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * + * SPDX-License-Identifier: curl + * ***************************************************************************/ #include "curl_setup.h" @@ -25,12 +27,12 @@ #include "urldata.h" #include "urlapi-int.h" #include "strcase.h" -#include "dotdot.h" #include "url.h" #include "escape.h" #include "curl_ctype.h" #include "inet_pton.h" #include "inet_ntop.h" +#include "strdup.h" /* The last 3 #include files should be in this order */ #include "curl_printf.h" @@ -66,9 +68,6 @@ struct Curl_URL { char *path; char *query; char *fragment; - - char *scratch; /* temporary scratch area */ - char *temppath; /* temporary path pointer */ long portnum; /* the numerical version */ }; @@ -86,8 +85,6 @@ static void free_urlhandle(struct Curl_URL *u) free(u->path); free(u->query); free(u->fragment); - free(u->scratch); - free(u->temppath); } /* @@ -119,91 +116,50 @@ static const char *find_host_sep(const char *url) } /* - * Decide in an encoding-independent manner whether a character in an - * URL must be escaped. The same criterion must be used in strlen_url() - * and strcpy_url(). + * Decide in an encoding-independent manner whether a character in a URL must + * be escaped. This is used in urlencode_str(). */ static bool urlchar_needs_escaping(int c) { return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); } -/* - * strlen_url() returns the length of the given URL if the spaces within the - * URL were properly URL encoded. - * URL encoding should be skipped for host names, otherwise IDN resolution - * will fail. - */ -static size_t strlen_url(const char *url, bool relative) -{ - const unsigned char *ptr; - size_t newlen = 0; - bool left = TRUE; /* left side of the ? */ - const unsigned char *host_sep = (const unsigned char *) url; - - if(!relative) - host_sep = (const unsigned char *) find_host_sep(url); - - for(ptr = (unsigned char *)url; *ptr; ptr++) { - - if(ptr < host_sep) { - ++newlen; - continue; - } - - if(*ptr == ' ') { - if(left) - newlen += 3; - else - newlen++; - continue; - } - - if (*ptr == '?') - left = FALSE; - - if(urlchar_needs_escaping(*ptr)) - newlen += 2; - - newlen++; - } - - return newlen; -} - -/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in - * the source URL accordingly. +/* urlencode_str() writes data into an output dynbuf and URL-encodes the + * spaces in the source URL accordingly. + * * URL encoding should be skipped for host names, otherwise IDN resolution * will fail. */ -static void strcpy_url(char *output, const char *url, bool relative) +static CURLUcode urlencode_str(struct dynbuf *o, const char *url, + size_t len, bool relative, + bool query) { /* we must add this with whitespace-replacing */ - bool left = TRUE; + bool left = !query; const unsigned char *iptr; - char *optr = output; const unsigned char *host_sep = (const unsigned char *) url; if(!relative) host_sep = (const unsigned char *) find_host_sep(url); for(iptr = (unsigned char *)url; /* read from here */ - *iptr; /* until zero byte */ - iptr++) { + len; iptr++, len--) { if(iptr < host_sep) { - *optr++ = *iptr; + if(Curl_dyn_addn(o, iptr, 1)) + return CURLUE_OUT_OF_MEMORY; continue; } if(*iptr == ' ') { if(left) { - *optr++='%'; /* add a '%' */ - *optr++='2'; /* add a '2' */ - *optr++='0'; /* add a '0' */ + if(Curl_dyn_addn(o, "%20", 3)) + return CURLUE_OUT_OF_MEMORY; + } + else { + if(Curl_dyn_addn(o, "+", 1)) + return CURLUE_OUT_OF_MEMORY; } - else - *optr++='+'; /* add a '+' here */ continue; } @@ -211,22 +167,28 @@ static void strcpy_url(char *output, const char *url, bool relative) left = FALSE; if(urlchar_needs_escaping(*iptr)) { - msnprintf(optr, 4, "%%%02x", *iptr); - optr += 3; + if(Curl_dyn_addf(o, "%%%02x", *iptr)) + return CURLUE_OUT_OF_MEMORY; + } + else { + if(Curl_dyn_addn(o, iptr, 1)) + return CURLUE_OUT_OF_MEMORY; } - else - *optr++ = *iptr; } - *optr = 0; /* null-terminate output buffer */ + return CURLUE_OK; } /* - * Returns true if the given URL is absolute (as opposed to relative). Returns - * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must - * be larger than MAX_SCHEME_LEN if buf is set. + * Returns the length of the scheme if the given URL is absolute (as opposed + * to relative). Stores the scheme in the buffer if TRUE and 'buf' is + * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set. + * + * If 'guess_scheme' is TRUE, it means the URL might be provided without + * scheme. */ -bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) +size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, + bool guess_scheme) { int i; DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN)); @@ -234,8 +196,8 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) if(buf) buf[0] = 0; /* always leave a defined value in buf */ #ifdef WIN32 - if(STARTS_WITH_DRIVE_PREFIX(url)) - return FALSE; + if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url)) + return 0; #endif for(i = 0; i < MAX_SCHEME_LEN; ++i) { char s = url[i]; @@ -248,16 +210,22 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) break; } } - if(i && (url[i] == ':') && (url[i + 1] == '/')) { + if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) { + /* If this does not guess scheme, the scheme always ends with the colon so + that this also detects data: URLs etc. In guessing mode, data: could + be the host name "data" with a specified port number. */ + + /* the length of the scheme is the name part only */ + size_t len = i; if(buf) { buf[i] = 0; while(i--) { - buf[i] = (char)TOLOWER(url[i]); + buf[i] = Curl_raw_tolower(url[i]); } } - return TRUE; + return len; } - return FALSE; + return 0; } /* @@ -265,34 +233,26 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) * URL-encodes any spaces. * The returned pointer must be freed by the caller unless NULL * (returns NULL on out of memory). + * + * Note that this function destroys the 'base' string. */ -static char *concat_url(const char *base, const char *relurl) +static char *concat_url(char *base, const char *relurl) { /*** TRY to append this new path to the old URL to the right of the host part. Oh crap, this is doomed to cause problems in the future... */ - char *newest; + struct dynbuf newest; char *protsep; char *pathsep; - size_t newlen; bool host_changed = FALSE; - const char *useurl = relurl; - size_t urllen; - - /* we must make our own copy of the URL to play with, as it may - point to read-only data */ - char *url_clone = strdup(base); - - if(!url_clone) - return NULL; /* skip out of this NOW */ /* protsep points to the start of the host name */ - protsep = strstr(url_clone, "//"); + protsep = strstr(base, "//"); if(!protsep) - protsep = url_clone; + protsep = base; else protsep += 2; /* pass the slashes */ @@ -385,38 +345,24 @@ static char *concat_url(const char *base, const char *relurl) } } - /* If the new part contains a space, this is a mighty stupid redirect - but we still make an effort to do "right". To the left of a '?' - letter we replace each space with %20 while it is replaced with '+' - on the right side of the '?' letter. - */ - newlen = strlen_url(useurl, !host_changed); - - urllen = strlen(url_clone); - - newest = malloc(urllen + 1 + /* possible slash */ - newlen + 1 /* zero byte */); - - if(!newest) { - free(url_clone); /* don't leak this */ - return NULL; - } + Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH); /* copy over the root url part */ - memcpy(newest, url_clone, urllen); + if(Curl_dyn_add(&newest, base)) + return NULL; /* check if we need to append a slash */ if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) ; - else - newest[urllen++]='/'; + else { + if(Curl_dyn_addn(&newest, "/", 1)) + return NULL; + } /* then append the new piece on the right side */ - strcpy_url(&newest[urllen], useurl, !host_changed); - - free(url_clone); + urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE); - return newest; + return Curl_dyn_ptr(&newest); } /* scan for byte values < 31 or 127 */ @@ -450,7 +396,7 @@ static bool junkscan(const char *part, unsigned int flags) * */ static CURLUcode parse_hostname_login(struct Curl_URL *u, - char **hostname, + struct dynbuf *host, unsigned int flags) { CURLUcode result = CURLUE_OK; @@ -460,27 +406,31 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, char *optionsp = NULL; const struct Curl_handler *h = NULL; - /* At this point, we're hoping all the other special cases have - * been taken care of, so conn->host.name is at most - * [user[:password][;options]]@]hostname + /* At this point, we assume all the other special cases have been taken + * care of, so the host is at most + * + * [user[:password][;options]]@]hostname * * We need somewhere to put the embedded details, so do that first. */ - char *ptr = strchr(*hostname, '@'); - char *login = *hostname; + char *login = Curl_dyn_ptr(host); + char *ptr; + DEBUGASSERT(login); + + ptr = strchr(login, '@'); if(!ptr) goto out; /* We will now try to extract the * possible login information in a string like: * ftp://user:password@ftp.my.site:8021/README */ - *hostname = ++ptr; + ptr++; /* if this is a known scheme, get some details */ if(u->scheme) - h = Curl_builtin_scheme(u->scheme); + h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED); /* We could use the login information in the URL so extract it. Only parse options if the handler says we should. Note that 'h' might be NULL! */ @@ -522,6 +472,10 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, u->options = optionsp; } + /* move the name to the start of the host buffer */ + if(Curl_dyn_tail(host, strlen(ptr))) + return CURLUE_OUT_OF_MEMORY; + return CURLUE_OK; out: @@ -535,13 +489,13 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, return result; } -UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, +UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, bool has_scheme) { char *portptr = NULL; char endbracket; int len; - + char *hostname = Curl_dyn_ptr(host); /* * Find the end of an IPv6 address, either on the ']' ending bracket or * a percent-encoded zone index. @@ -578,6 +532,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, char *rest; long port; char portbuf[7]; + size_t keep = portptr - hostname; /* Browser behavior adaptation. If there's a colon with no digits after, just cut off the name there which makes us ignore the colon and just @@ -586,15 +541,15 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, Don't do it if the URL has no scheme, to make something that looks like a scheme not work! */ - if(!portptr[1]) { - *portptr = '\0'; + Curl_dyn_setlen(host, keep); + portptr++; + if(!*portptr) return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER; - } - if(!ISDIGIT(portptr[1])) + if(!ISDIGIT(*portptr)) return CURLUE_BAD_PORT_NUMBER; - port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */ + port = strtol(portptr, &rest, 10); /* Port number must be decimal */ if(port > 0xffff) return CURLUE_BAD_PORT_NUMBER; @@ -602,7 +557,6 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, if(rest[0]) return CURLUE_BAD_PORT_NUMBER; - *portptr++ = '\0'; /* cut off the name there */ *rest = 0; /* generate a new port number string to get rid of leading zeroes etc */ msnprintf(portbuf, sizeof(portbuf), "%ld", port); @@ -615,12 +569,15 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, return CURLUE_OK; } -static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) +static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, + size_t hlen) /* length of hostname */ { size_t len; - size_t hlen = strlen(hostname); + DEBUGASSERT(hostname); - if(hostname[0] == '[') { + if(!hostname[0]) + return CURLUE_NO_HOST; + else if(hostname[0] == '[') { const char *l = "0123456789abcdefABCDEF:."; if(hlen < 4) /* '[::]' is the shortest possible valid string */ return CURLUE_BAD_IPV6; @@ -679,13 +636,11 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) } else { /* letters from the second string are not ok */ - len = strcspn(hostname, " \r\n\t/:#?!@"); + len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,"); if(hlen != len) /* hostname with bad content */ return CURLUE_BAD_HOSTNAME; } - if(!hostname[0]) - return CURLUE_NO_HOST; return CURLUE_OK; } @@ -779,79 +734,230 @@ static bool ipv4_normalize(const char *hostname, char *outp, size_t olen) return TRUE; } -/* return strdup'ed version in 'outp', possibly percent decoded */ -static CURLUcode decode_host(char *hostname, char **outp) +/* if necessary, replace the host content with a URL decoded version */ +static CURLUcode decode_host(struct dynbuf *host) { char *per = NULL; - if(hostname[0] != '[') + const char *hostname = Curl_dyn_ptr(host); + if(hostname[0] == '[') /* only decode if not an ipv6 numerical */ - per = strchr(hostname, '%'); - if(!per) { - *outp = strdup(hostname); - if(!*outp) - return CURLUE_OUT_OF_MEMORY; - } + return CURLUE_OK; + per = strchr(hostname, '%'); + if(!per) + /* nothing to decode */ + return CURLUE_OK; else { - /* might be encoded */ + /* encoded */ size_t dlen; - CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL); + char *decoded; + CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen, + REJECT_CTRL); if(result) return CURLUE_BAD_HOSTNAME; + Curl_dyn_reset(host); + result = Curl_dyn_addn(host, decoded, dlen); + free(decoded); + if(result) + return CURLUE_OUT_OF_MEMORY; } return CURLUE_OK; } -static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) +/* + * "Remove Dot Segments" + * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 + */ + +/* + * dedotdotify() + * @unittest: 1395 + * + * This function gets a null-terminated path with dot and dotdot sequences + * passed in and strips them off according to the rules in RFC 3986 section + * 5.2.4. + * + * The function handles a query part ('?' + stuff) appended but it expects + * that fragments ('#' + stuff) have already been cut off. + * + * RETURNS + * + * an allocated dedotdotified output string + */ +UNITTEST char *dedotdotify(const char *input, size_t clen); +UNITTEST char *dedotdotify(const char *input, size_t clen) { - char *path; - bool path_alloced = FALSE; + char *out = malloc(clen + 1); + char *outptr; + const char *orginput = input; + char *queryp; + if(!out) + return NULL; /* out of memory */ + + *out = 0; /* null-terminates, for inputs like "./" */ + outptr = out; + + if(!*input) + /* zero length input string, return that */ + return out; + + /* + * To handle query-parts properly, we must find it and remove it during the + * dotdot-operation and then append it again at the end to the output + * string. + */ + queryp = strchr(input, '?'); + + do { + bool dotdot = TRUE; + if(*input == '.') { + /* A. If the input buffer begins with a prefix of "../" or "./", then + remove that prefix from the input buffer; otherwise, */ + + if(!strncmp("./", input, 2)) { + input += 2; + clen -= 2; + } + else if(!strncmp("../", input, 3)) { + input += 3; + clen -= 3; + } + /* D. if the input buffer consists only of "." or "..", then remove + that from the input buffer; otherwise, */ + + else if(!strcmp(".", input) || !strcmp("..", input) || + !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) { + *out = 0; + break; + } + else + dotdot = FALSE; + } + else if(*input == '/') { + /* B. if the input buffer begins with a prefix of "/./" or "/.", where + "." is a complete path segment, then replace that prefix with "/" in + the input buffer; otherwise, */ + if(!strncmp("/./", input, 3)) { + input += 2; + clen -= 2; + } + else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) { + *outptr++ = '/'; + *outptr = 0; + break; + } + + /* C. if the input buffer begins with a prefix of "/../" or "/..", + where ".." is a complete path segment, then replace that prefix with + "/" in the input buffer and remove the last segment and its + preceding "/" (if any) from the output buffer; otherwise, */ + + else if(!strncmp("/../", input, 4)) { + input += 3; + clen -= 3; + /* remove the last segment from the output buffer */ + while(outptr > out) { + outptr--; + if(*outptr == '/') + break; + } + *outptr = 0; /* null-terminate where it stops */ + } + else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) { + /* remove the last segment from the output buffer */ + while(outptr > out) { + outptr--; + if(*outptr == '/') + break; + } + *outptr++ = '/'; + *outptr = 0; /* null-terminate where it stops */ + break; + } + else + dotdot = FALSE; + } + else + dotdot = FALSE; + + if(!dotdot) { + /* E. move the first path segment in the input buffer to the end of + the output buffer, including the initial "/" character (if any) and + any subsequent characters up to, but not including, the next "/" + character or the end of the input buffer. */ + + do { + *outptr++ = *input++; + clen--; + } while(*input && (*input != '/') && (*input != '?')); + *outptr = 0; + } + + /* continue until end of input string OR, if there is a terminating + query part, stop there */ + } while(*input && (!queryp || (input < queryp))); + + if(queryp) { + size_t qlen; + /* There was a query part, append that to the output. */ + size_t oindex = queryp - orginput; + qlen = strlen(&orginput[oindex]); + memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */ + } + + return out; +} + +static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) +{ + const char *path; + size_t pathlen; bool uncpath = FALSE; - char *hostname; char *query = NULL; char *fragment = NULL; - CURLUcode result; - bool url_has_scheme = FALSE; char schemebuf[MAX_SCHEME_LEN + 1]; const char *schemep = NULL; size_t schemelen = 0; size_t urllen; + CURLUcode result = CURLUE_OK; + size_t fraglen = 0; + struct dynbuf host; DEBUGASSERT(url); + Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH); + /************************************************************* * Parse the URL. ************************************************************/ /* allocate scratch area */ urllen = strlen(url); - if(urllen > CURL_MAX_INPUT_LENGTH) + if(urllen > CURL_MAX_INPUT_LENGTH) { /* excessive input length */ - return CURLUE_MALFORMED_INPUT; - - path = u->scratch = malloc(urllen * 2 + 2); - if(!path) - return CURLUE_OUT_OF_MEMORY; - - hostname = &path[urllen + 1]; - hostname[0] = 0; - - if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) { - url_has_scheme = TRUE; - schemelen = strlen(schemebuf); + result = CURLUE_MALFORMED_INPUT; + goto fail; } + schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf), + flags & (CURLU_GUESS_SCHEME| + CURLU_DEFAULT_SCHEME)); + /* handle the file: scheme */ - if(url_has_scheme && !strcmp(schemebuf, "file")) { - if(urllen <= 6) + if(schemelen && !strcmp(schemebuf, "file")) { + if(urllen <= 6) { /* file:/ is not enough to actually be a complete file: URL */ - return CURLUE_BAD_FILE_URL; + result = CURLUE_BAD_FILE_URL; + goto fail; + } /* path has been allocated large enough to hold this */ - strcpy(path, &url[5]); + path = (char *)&url[5]; - u->scheme = strdup("file"); - if(!u->scheme) - return CURLUE_OUT_OF_MEMORY; + schemep = u->scheme = strdup("file"); + if(!u->scheme) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } /* Extra handling URLs with an authority component (i.e. that start with * "file://") @@ -861,7 +967,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) */ if(path[0] == '/' && path[1] == '/') { /* swallow the two slashes */ - char *ptr = &path[2]; + const char *ptr = &path[2]; /* * According to RFC 8089, a file: URL can be reliably dereferenced if: @@ -897,13 +1003,17 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) chars, and the delimiting slash character must be appended to the host name */ path = strpbrk(ptr, "/\\:*?\"<>|"); - if(!path || *path != '/') - return CURLUE_BAD_FILE_URL; + if(!path || *path != '/') { + result = CURLUE_BAD_FILE_URL; + goto fail; + } len = path - ptr; if(len) { - memcpy(hostname, ptr, len); - hostname[len] = 0; + if(Curl_dyn_addn(&host, ptr, len)) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } uncpath = TRUE; } @@ -911,7 +1021,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) #else /* Invalid file://hostname/, expected localhost or 127.0.0.1 or none */ - return CURLUE_BAD_FILE_URL; + result = CURLUE_BAD_FILE_URL; + goto fail; #endif } } @@ -920,7 +1031,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } if(!uncpath) - hostname = NULL; /* no host for file: URLs by default */ + /* no host for file: URLs by default */ + Curl_dyn_reset(&host); #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__) /* Don't allow Windows drive letters when not in Windows. @@ -928,13 +1040,14 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || STARTS_WITH_URL_DRIVE_PREFIX(path)) { /* File drive letters are only accepted in MSDOS/Windows */ - return CURLUE_BAD_FILE_URL; + result = CURLUE_BAD_FILE_URL; + goto fail; } #else /* If the path starts with a slash and a drive letter, ditch the slash */ if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) { /* This cannot be done with strcpy, as the memory chunks overlap! */ - memmove(path, &path[1], strlen(&path[1]) + 1); + path++; } #endif @@ -944,32 +1057,39 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) const char *p; const char *hostp; size_t len; - path[0] = 0; - if(url_has_scheme) { + if(schemelen) { int i = 0; p = &url[schemelen + 1]; while(p && (*p == '/') && (i < 4)) { p++; i++; } - if((i < 1) || (i>3)) - /* less than one or more than three slashes */ - return CURLUE_BAD_SLASHES; schemep = schemebuf; - if(!Curl_builtin_scheme(schemep) && - !(flags & CURLU_NON_SUPPORT_SCHEME)) - return CURLUE_UNSUPPORTED_SCHEME; + if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) && + !(flags & CURLU_NON_SUPPORT_SCHEME)) { + result = CURLUE_UNSUPPORTED_SCHEME; + goto fail; + } - if(junkscan(schemep, flags)) - return CURLUE_BAD_SCHEME; + if((i < 1) || (i>3)) { + /* less than one or more than three slashes */ + result = CURLUE_BAD_SLASHES; + goto fail; + } + if(junkscan(schemep, flags)) { + result = CURLUE_BAD_SCHEME; + goto fail; + } } else { /* no scheme! */ - if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) - return CURLUE_BAD_SCHEME; + if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) { + result = CURLUE_BAD_SCHEME; + goto fail; + } if(flags & CURLU_DEFAULT_SCHEME) schemep = DEFAULT_SCHEME; @@ -986,122 +1106,169 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) len = p - hostp; if(len) { - memcpy(hostname, hostp, len); - hostname[len] = 0; + if(Curl_dyn_addn(&host, hostp, len)) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } } else { - if(!(flags & CURLU_NO_AUTHORITY)) - return CURLUE_NO_HOST; + if(!(flags & CURLU_NO_AUTHORITY)) { + result = CURLUE_NO_HOST; + goto fail; + } } - strcpy(path, p); + path = (char *)p; if(schemep) { u->scheme = strdup(schemep); - if(!u->scheme) - return CURLUE_OUT_OF_MEMORY; + if(!u->scheme) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } } } - if((flags & CURLU_URLENCODE) && path[0]) { - /* worst case output length is 3x the original! */ - char *newp = malloc(strlen(path) * 3); - if(!newp) - return CURLUE_OUT_OF_MEMORY; - path_alloced = TRUE; - strcpy_url(newp, path, TRUE); /* consider it relative */ - u->temppath = path = newp; - } - fragment = strchr(path, '#'); if(fragment) { - *fragment++ = 0; - if(junkscan(fragment, flags)) - return CURLUE_BAD_FRAGMENT; - if(fragment[0]) { - u->fragment = strdup(fragment); - if(!u->fragment) - return CURLUE_OUT_OF_MEMORY; + fraglen = strlen(fragment); + if(fraglen > 1) { + /* skip the leading '#' in the copy but include the terminating null */ + u->fragment = Curl_memdup(fragment + 1, fraglen); + if(!u->fragment) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } + + if(junkscan(u->fragment, flags)) { + result = CURLUE_BAD_FRAGMENT; + goto fail; + } } } query = strchr(path, '?'); - if(query) { - *query++ = 0; - if(junkscan(query, flags)) - return CURLUE_BAD_QUERY; - /* done even if the query part is a blank string */ - u->query = strdup(query); - if(!u->query) - return CURLUE_OUT_OF_MEMORY; - } + if(query && (!fragment || (query < fragment))) { + size_t qlen = strlen(query) - fraglen; /* includes '?' */ + pathlen = strlen(path) - qlen - fraglen; + if(qlen > 1) { + if(qlen && (flags & CURLU_URLENCODE)) { + struct dynbuf enc; + Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + /* skip the leading question mark */ + if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } + u->query = Curl_dyn_ptr(&enc); + } + else { + u->query = Curl_memdup(query + 1, qlen); + if(!u->query) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } + u->query[qlen - 1] = 0; + } - if(junkscan(path, flags)) - return CURLUE_BAD_PATH; + if(junkscan(u->query, flags)) { + result = CURLUE_BAD_QUERY; + goto fail; + } + } + else { + /* single byte query */ + u->query = strdup(""); + if(!u->query) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } + } + } + else + pathlen = strlen(path) - fraglen; + + if(pathlen && (flags & CURLU_URLENCODE)) { + struct dynbuf enc; + Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } + pathlen = Curl_dyn_len(&enc); + path = u->path = Curl_dyn_ptr(&enc); + } - if(!path[0]) - /* if there's no path left set, unset */ + if(!pathlen) { + /* there is no path left, unset */ path = NULL; + } else { + if(!u->path) { + u->path = Curl_memdup(path, pathlen + 1); + if(!u->path) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } + u->path[pathlen] = 0; + path = u->path; + } + else if(flags & CURLU_URLENCODE) + /* it might have encoded more than just the path so cut it */ + u->path[pathlen] = 0; + + if(junkscan(u->path, flags)) { + result = CURLUE_BAD_PATH; + goto fail; + } + if(!(flags & CURLU_PATH_AS_IS)) { /* remove ../ and ./ sequences according to RFC3986 */ - char *newp = Curl_dedotdotify(path); - if(!newp) - return CURLUE_OUT_OF_MEMORY; - - if(strcmp(newp, path)) { - /* if we got a new version */ - if(path_alloced) - Curl_safefree(u->temppath); - u->temppath = path = newp; - path_alloced = TRUE; + char *newp = dedotdotify((char *)path, pathlen); + if(!newp) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; } - else - free(newp); + free(u->path); + u->path = newp; } - - u->path = path_alloced?path:strdup(path); - if(!u->path) - return CURLUE_OUT_OF_MEMORY; - u->temppath = NULL; /* used now */ } - if(hostname) { + if(Curl_dyn_len(&host)) { char normalized_ipv4[sizeof("255.255.255.255") + 1]; /* * Parse the login details and strip them out of the host name. */ - result = parse_hostname_login(u, &hostname, flags); + result = parse_hostname_login(u, &host, flags); + if(!result) + result = Curl_parse_port(u, &host, schemelen); if(result) - return result; + goto fail; - result = Curl_parse_port(u, hostname, url_has_scheme); - if(result) - return result; - - if(junkscan(hostname, flags)) - return CURLUE_BAD_HOSTNAME; + if(junkscan(Curl_dyn_ptr(&host), flags)) { + result = CURLUE_BAD_HOSTNAME; + goto fail; + } - if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) { - /* Skip hostname check, it's allowed to be empty. */ - u->host = strdup(""); + if(ipv4_normalize(Curl_dyn_ptr(&host), + normalized_ipv4, sizeof(normalized_ipv4))) { + Curl_dyn_reset(&host); + if(Curl_dyn_add(&host, normalized_ipv4)) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } } else { - if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4))) - u->host = strdup(normalized_ipv4); - else { - result = decode_host(hostname, &u->host); - if(result) - return result; - result = hostname_check(u, u->host); - if(result) - return result; - } + result = decode_host(&host); + if(!result) + result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host)); + if(result) + goto fail; } - if(!u->host) - return CURLUE_OUT_OF_MEMORY; + if((flags & CURLU_GUESS_SCHEME) && !schemep) { + const char *hostname = Curl_dyn_ptr(&host); /* legacy curl-style guess based on host name */ if(checkprefix("ftp.", hostname)) schemep = "ftp"; @@ -1119,27 +1286,26 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) schemep = "http"; u->scheme = strdup(schemep); - if(!u->scheme) - return CURLUE_OUT_OF_MEMORY; + if(!u->scheme) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; + } + } + } + else if(flags & CURLU_NO_AUTHORITY) { + /* allowed to be empty. */ + if(Curl_dyn_add(&host, "")) { + result = CURLUE_OUT_OF_MEMORY; + goto fail; } } - Curl_safefree(u->scratch); - Curl_safefree(u->temppath); - - return CURLUE_OK; -} + u->host = Curl_dyn_ptr(&host); -/* - * Parse the URL and set the relevant members of the Curl_URL struct. - */ -static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) -{ - CURLUcode result = seturl(url, u, flags); - if(result) { - free_urlhandle(u); - memset(u, 0, sizeof(struct Curl_URL)); - } + return result; + fail: + Curl_dyn_free(&host); + free_urlhandle(u); return result; } @@ -1157,8 +1323,6 @@ static CURLUcode parseurl_and_replace(const char *url, CURLU *u, free_urlhandle(u); *u = tmpurl; } - else - free_urlhandle(&tmpurl); return result; } @@ -1257,7 +1421,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, /* there's no stored port number, but asked to deliver a default one for the scheme */ const struct Curl_handler *h = - Curl_builtin_scheme(u->scheme); + Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED); if(h) { msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); ptr = portbuf; @@ -1267,7 +1431,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, /* there is a stored port number, but ask to inhibit if it matches the default one for the scheme */ const struct Curl_handler *h = - Curl_builtin_scheme(u->scheme); + Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED); if(h && (h->defport == u->portnum) && (flags & CURLU_NO_DEFAULT_PORT)) ptr = NULL; @@ -1313,7 +1477,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, else return CURLUE_NO_SCHEME; - h = Curl_builtin_scheme(scheme); + h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED); if(!port && (flags & CURLU_DEFAULT_PORT)) { /* there's no stored port number, but asked to deliver a default one for the scheme */ @@ -1336,14 +1500,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, if(u->host[0] == '[') { if(u->zoneid) { /* make it '[ host %25 zoneid ]' */ + struct dynbuf enc; size_t hostlen = strlen(u->host); - size_t alen = hostlen + 3 + strlen(u->zoneid) + 1; - allochost = malloc(alen); - if(!allochost) + Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host, + u->zoneid)) return CURLUE_OUT_OF_MEMORY; - memcpy(allochost, u->host, hostlen - 1); - msnprintf(&allochost[hostlen - 1], alen - hostlen + 1, - "%%25%s]", u->zoneid); + allochost = Curl_dyn_ptr(&enc); } } else if(urlencode) { @@ -1354,32 +1517,32 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, else { /* only encode '%' in output host name */ char *host = u->host; - size_t pcount = 0; + bool percent = FALSE; /* first, count number of percents present in the name */ while(*host) { - if(*host == '%') - pcount++; + if(*host == '%') { + percent = TRUE; + break; + } host++; } - /* if there were percents, encode the host name */ - if(pcount) { - size_t hostlen = strlen(u->host); - size_t alen = hostlen + 2 * pcount + 1; - char *o = allochost = malloc(alen); - if(!allochost) - return CURLUE_OUT_OF_MEMORY; - + /* if there were percent(s), encode the host name */ + if(percent) { + struct dynbuf enc; + CURLcode result; + Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); host = u->host; while(*host) { - if(*host == '%') { - memcpy(o, "%25", 3); - o += 3; - host++; - continue; - } - *o++ = *host++; + if(*host == '%') + result = Curl_dyn_addn(&enc, "%25", 3); + else + result = Curl_dyn_addn(&enc, host, 1); + if(result) + return CURLUE_OUT_OF_MEMORY; + host++; } - *o = '\0'; + free(u->host); + u->host = Curl_dyn_ptr(&enc); } } @@ -1412,13 +1575,15 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, break; } if(ptr) { - *part = strdup(ptr); + size_t partlen = strlen(ptr); + size_t i = 0; + *part = Curl_memdup(ptr, partlen + 1); if(!*part) return CURLUE_OUT_OF_MEMORY; if(plusdecode) { /* convert + to space */ - char *plus; - for(plus = *part; *plus; ++plus) { + char *plus = *part; + for(i = 0; i < partlen; ++plus, i++) { if(*plus == '+') *plus = ' '; } @@ -1435,7 +1600,18 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, return CURLUE_URLDECODE; } *part = decoded; + partlen = dlen; } + if(urlencode) { + struct dynbuf enc; + Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + if(urlencode_str(&enc, *part, partlen, TRUE, + what == CURLUPART_QUERY)) + return CURLUE_OUT_OF_MEMORY; + free(*part); + *part = Curl_dyn_ptr(&enc); + } + return CURLUE_OK; } else @@ -1497,6 +1673,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, if(storep && *storep) { Curl_safefree(*storep); } + else if(!storep) { + free_urlhandle(u); + memset(u, 0, sizeof(struct Curl_URL)); + } return CURLUE_OK; } @@ -1507,7 +1687,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, return CURLUE_BAD_SCHEME; if(!(flags & CURLU_NON_SUPPORT_SCHEME) && /* verify that it is a fine scheme */ - !Curl_builtin_scheme(part)) + !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED)) return CURLUE_UNSUPPORTED_SCHEME; storep = &u->scheme; urlencode = FALSE; /* never */ @@ -1573,7 +1753,9 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, /* if the new thing is absolute or the old one is not * (we could not get an absolute url in 'oldurl'), * then replace the existing with the new. */ - if(Curl_is_absolute_url(part, NULL, 0) + if(Curl_is_absolute_url(part, NULL, 0, + flags & (CURLU_GUESS_SCHEME| + CURLU_DEFAULT_SCHEME)) || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) { return parseurl_and_replace(part, u, flags); } @@ -1603,14 +1785,16 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, if(urlencode) { const unsigned char *i; - char *o; - char *enc = malloc(nalloc * 3 + 1); /* for worst case! */ - if(!enc) - return CURLUE_OUT_OF_MEMORY; - for(i = (const unsigned char *)part, o = enc; *i; i++) { + struct dynbuf enc; + + Curl_dyn_init(&enc, nalloc * 3 + 1); + + for(i = (const unsigned char *)part; *i; i++) { + CURLcode result; if((*i == ' ') && plusencode) { - *o = '+'; - o++; + result = Curl_dyn_addn(&enc, "+", 1); + if(result) + return CURLUE_OUT_OF_MEMORY; } else if(Curl_isunreserved(*i) || ((*i == '/') && urlskipslash) || @@ -1618,16 +1802,17 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, if((*i == '=') && equalsencode) /* only skip the first equals sign */ equalsencode = FALSE; - *o = *i; - o++; + result = Curl_dyn_addn(&enc, i, 1); + if(result) + return CURLUE_OUT_OF_MEMORY; } else { - msnprintf(o, 4, "%%%02x", *i); - o += 3; + result = Curl_dyn_addf(&enc, "%%%02x", *i); + if(result) + return CURLUE_OUT_OF_MEMORY; } } - *o = 0; /* null-terminate */ - newp = enc; + newp = Curl_dyn_ptr(&enc); } else { char *p; @@ -1639,8 +1824,8 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, /* make sure percent encoded are lower case */ if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) && (ISUPPER(p[1]) || ISUPPER(p[2]))) { - p[1] = (char)TOLOWER(p[1]); - p[2] = (char)TOLOWER(p[2]); + p[1] = Curl_raw_tolower(p[1]); + p[2] = Curl_raw_tolower(p[2]); p += 3; } else @@ -1649,34 +1834,41 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, } if(appendquery) { - /* Append the string onto the old query. Add a '&' separator if none is - present at the end of the exsting query already */ + /* Append the 'newp' string onto the old query. Add a '&' separator if + none is present at the end of the existing query already */ + size_t querylen = u->query ? strlen(u->query) : 0; bool addamperand = querylen && (u->query[querylen -1] != '&'); if(querylen) { - size_t newplen = strlen(newp); - char *p = malloc(querylen + addamperand + newplen + 1); - if(!p) { - free((char *)newp); - return CURLUE_OUT_OF_MEMORY; + struct dynbuf enc; + Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); + + if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */ + goto nomem; + + if(addamperand) { + if(Curl_dyn_addn(&enc, "&", 1)) + goto nomem; } - strcpy(p, u->query); /* original query */ - if(addamperand) - p[querylen] = '&'; /* ampersand */ - strcpy(&p[querylen + addamperand], newp); /* new suffix */ + if(Curl_dyn_add(&enc, newp)) + goto nomem; free((char *)newp); free(*storep); - *storep = p; + *storep = Curl_dyn_ptr(&enc); return CURLUE_OK; + nomem: + free((char *)newp); + return CURLUE_OUT_OF_MEMORY; } } if(what == CURLUPART_HOST) { - if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) { + size_t n = strlen(newp); + if(!n && (flags & CURLU_NO_AUTHORITY)) { /* Skip hostname check, it's allowed to be empty. */ } else { - if(hostname_check(u, (char *)newp)) { + if(hostname_check(u, (char *)newp, n)) { free((char *)newp); return CURLUE_BAD_HOSTNAME; } |