summaryrefslogtreecommitdiffstats
path: root/Lib/urllib/parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib/parse.py')
-rw-r--r--Lib/urllib/parse.py10
1 files changed, 10 insertions, 0 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 36fd8fe..f0d9d4d 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -77,6 +77,9 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
'0123456789'
'+-.')
+# Unsafe bytes to be removed per WHATWG spec
+_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
+
# XXX: Consider replacing with functools.lru_cache
MAX_CACHE_SIZE = 20
_parse_cache = {}
@@ -414,6 +417,11 @@ def _checknetloc(netloc):
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")
+def _remove_unsafe_bytes_from_url(url):
+ for b in _UNSAFE_URL_BYTES_TO_REMOVE:
+ url = url.replace(b, "")
+ return url
+
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -421,6 +429,8 @@ def urlsplit(url, scheme='', allow_fragments=True):
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
url, scheme, _coerce_result = _coerce_args(url, scheme)
+ url = _remove_unsafe_bytes_from_url(url)
+ scheme = _remove_unsafe_bytes_from_url(scheme)
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)