diff options
author | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-18 02:27:10 (GMT) |
---|---|---|
committer | Senthil Kumaran <orsenthil@gmail.com> | 2010-07-18 02:27:10 (GMT) |
commit | 5dba6dfe6a8224a54c3a4fd5cc2393a734718a77 (patch) | |
tree | fe2743a3d25ca6bae5a1549477874eb390958489 /Lib/urllib.py | |
parent | 5d10d33cd549e93079b7c4cf5f15eb0529c9bd41 (diff) | |
download | cpython-5dba6dfe6a8224a54c3a4fd5cc2393a734718a77.zip cpython-5dba6dfe6a8224a54c3a4fd5cc2393a734718a77.tar.gz cpython-5dba6dfe6a8224a54c3a4fd5cc2393a734718a77.tar.bz2 |
Fixing Issue1712522 - urllib.quote to support Unicode. The default
encoding='utf-8' and errors='strict'.
Diffstat (limited to 'Lib/urllib.py')
-rw-r--r-- | Lib/urllib.py | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/Lib/urllib.py b/Lib/urllib.py index fb98421..e32a771 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1193,7 +1193,7 @@ for i, c in zip(xrange(256), str(bytearray(xrange(256)))): _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) _safe_quoters = {} -def quote(s, safe='/'): +def quote(s, safe='/', encoding=None, errors=None): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a @@ -1213,10 +1213,28 @@ def quote(s, safe='/'): is reserved, but in typical usage the quote function is being called on a path where the existing slash characters are used as reserved characters. + + string and safe may be either str or unicode objects. + + The optional encoding and errors parameters specify how to deal with the + non-ASCII characters, as accepted by the unicode.encode method. + By default, encoding='utf-8' (characters are encoded with UTF-8), and + errors='strict' (unsupported characters raise a UnicodeEncodeError). """ # fastpath if not s: return s + + if encoding is not None or isinstance(s, unicode): + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'strict' + s = s.encode(encoding, errors) + if isinstance(safe, unicode): + # Normalize 'safe' by converting to str and removing non-ASCII chars + safe = safe.encode('ascii', 'ignore') + cachekey = (safe, always_safe) try: (quoter, safe) = _safe_quoters[cachekey] @@ -1230,12 +1248,12 @@ def quote(s, safe='/'): return s return ''.join(map(quoter, s)) -def quote_plus(s, safe=''): +def quote_plus(s, safe='', encoding=None, errors=None): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: - s = quote(s, safe + ' ') + s = quote(s, safe + ' ', encoding, errors) return s.replace(' ', '+') - return quote(s, safe) + return quote(s, safe, encoding, errors) def urlencode(query, doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. |