diff options
Diffstat (limited to 'Lib/urllib/parse.py')
-rw-r--r-- | Lib/urllib/parse.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 36fd8fe..f0d9d4d 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -77,6 +77,9 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz' '0123456789' '+-.') +# Unsafe bytes to be removed per WHATWG spec +_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] + # XXX: Consider replacing with functools.lru_cache MAX_CACHE_SIZE = 20 _parse_cache = {} @@ -414,6 +417,11 @@ def _checknetloc(netloc): raise ValueError("netloc '" + netloc + "' contains invalid " + "characters under NFKC normalization") +def _remove_unsafe_bytes_from_url(url): + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + url = url.replace(b, "") + return url + def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> @@ -421,6 +429,8 @@ def urlsplit(url, scheme='', allow_fragments=True): Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) + url = _remove_unsafe_bytes_from_url(url) + scheme = _remove_unsafe_bytes_from_url(scheme) allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) |