diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2021-05-05 17:25:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-05 17:25:29 (GMT) |
commit | 515a7bc4e13645d0945b46a8e1d9102b918cd407 (patch) | |
tree | bca9cf860500a30cc5d42806342f9c4420c17ffe /Lib/urllib/parse.py | |
parent | 44f6b9aa49d562ab7c67952442b8348346b24141 (diff) | |
download | cpython-515a7bc4e13645d0945b46a8e1d9102b918cd407.zip cpython-515a7bc4e13645d0945b46a8e1d9102b918cd407.tar.gz cpython-515a7bc4e13645d0945b46a8e1d9102b918cd407.tar.bz2 |
[3.8] bpo-43882 - urllib.parse should sanitize urls containing ASCII newline and tabs. (GH-25595) (#25726)
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
(cherry picked from commit 76cd81d60310d65d01f9d7b48a8985d8ab89c8b4)
Co-authored-by: Senthil Kumaran <senthil@uthcode.com>
Co-authored-by: Senthil Kumaran <skumaran@gatech.edu>
Diffstat (limited to 'Lib/urllib/parse.py')
-rw-r--r-- | Lib/urllib/parse.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 36fd8fe..f0d9d4d 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -77,6 +77,9 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz' '0123456789' '+-.') +# Unsafe bytes to be removed per WHATWG spec +_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] + # XXX: Consider replacing with functools.lru_cache MAX_CACHE_SIZE = 20 _parse_cache = {} @@ -414,6 +417,11 @@ def _checknetloc(netloc): raise ValueError("netloc '" + netloc + "' contains invalid " + "characters under NFKC normalization") +def _remove_unsafe_bytes_from_url(url): + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + url = url.replace(b, "") + return url + def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> @@ -421,6 +429,8 @@ def urlsplit(url, scheme='', allow_fragments=True): Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) + url = _remove_unsafe_bytes_from_url(url) + scheme = _remove_unsafe_bytes_from_url(scheme) allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) |