diff options
author | Gregory P. Smith <greg@krypto.org> | 2019-05-01 02:12:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-01 02:12:21 (GMT) |
commit | c4e671eec20dfcb29b18596a89ef075f826c9f96 (patch) | |
tree | ed97dd046a1467e029caed8416ed6de7182ef53a /Lib/http | |
parent | 5f38b8407b071acd96da2c8cde411d0e26967735 (diff) | |
download | cpython-c4e671eec20dfcb29b18596a89ef075f826c9f96.zip cpython-c4e671eec20dfcb29b18596a89ef075f826c9f96.tar.gz cpython-c4e671eec20dfcb29b18596a89ef075f826c9f96.tar.bz2 |
bpo-30458: Disallow control chars in http URLs. (GH-12755)
Disallow control chars in http URLs in urllib.urlopen. This addresses a potential security problem for applications that do not sanity check their URLs where http request headers could be injected.
Diffstat (limited to 'Lib/http')
-rw-r--r-- | Lib/http/client.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py index 5a22252..99d6a68 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -137,6 +137,16 @@ _MAXHEADERS = 100 _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search +# These characters are not allowed within HTTP URL paths. +# See https://tools.ietf.org/html/rfc3986#section-3.3 and the +# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition. +# Prevents CVE-2019-9740. Includes control characters such as \r\n. +# We don't restrict chars above \x7f as putrequest() limits us to ASCII. +_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]') +# Arguably only these _should_ allowed: +# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$") +# We are more lenient for assumed real world compatibility purposes. + # We always set the Content-Length header for these methods because some # servers will otherwise respond with a 411 _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} @@ -1079,6 +1089,10 @@ class HTTPConnection: self._method = method if not url: url = '/' + # Prevent CVE-2019-9740. + if match := _contains_disallowed_url_pchar_re.search(url): + raise ValueError(f"URL can't contain control characters. {url!r} " + f"(found at least {match.group()!r})") request = '%s %s %s' % (method, url, self._http_vsn_str) # Non-ASCII characters should have been eliminated earlier |