From 1c24592b925ba4716f2c0cec10bfe59cef2eed30 Mon Sep 17 00:00:00 2001 From: Florent Xicluna Date: Mon, 9 Aug 2010 22:05:50 +0000 Subject: Merged revisions 81053,81259,81265 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81053 | florent.xicluna | 2010-05-10 21:59:22 +0200 (lun., 10 mai 2010) | 2 lines Add a link on maketrans(). ........ r81259 | florent.xicluna | 2010-05-17 12:39:07 +0200 (lun., 17 mai 2010) | 2 lines Slight style cleanup. ........ r81265 | florent.xicluna | 2010-05-17 15:35:09 +0200 (lun., 17 mai 2010) | 2 lines Issue #1285086: Speed up urllib.quote and urllib.unquote for simple cases. ........ --- Doc/library/stdtypes.rst | 6 ++--- Lib/urllib.py | 59 ++++++++++++++++++++++++++++-------------------- Misc/NEWS | 2 ++ 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 188d3eb..6999a5b 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1186,9 +1186,9 @@ string functions based on regular expressions. mapped through the given translation table, which must be a string of length 256. - You can use the :func:`maketrans` helper function in the :mod:`string` module to - create a translation table. For string objects, set the *table* argument to - ``None`` for translations that only delete characters: + You can use the :func:`~string.maketrans` helper function in the :mod:`string` + module to create a translation table. For string objects, set the *table* + argument to ``None`` for translations that only delete characters: >>> 'read this short text'.translate(None, 'aeiou') 'rd ths shrt txt' diff --git a/Lib/urllib.py b/Lib/urllib.py index 042b4d8..affe92d 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -94,6 +94,8 @@ def urlretrieve(url, filename=None, reporthook=None, data=None): def urlcleanup(): if _urlopener: _urlopener.cleanup() + _safe_quoters.clear() + ftpcache.clear() # check for SSL try: @@ -771,7 +773,7 @@ class FancyURLopener(URLopener): else: return self.open(newurl, data) - def get_user_passwd(self, host, realm, clear_cache = 0): + def get_user_passwd(self, host, realm, clear_cache=0): key = realm + '@' + host.lower() if key in self.auth_cache: if clear_cache: @@ -1163,20 +1165,24 @@ def splitvalue(attr): return attr, None _hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) +_hextochr = dict((a + b, chr(int(a + b, 16))) + for a in _hexdig for b in _hexdig) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') - for i in xrange(1, len(res)): - item = res[i] + # fastpath + if len(res) == 1: + return s + s = res[0] + for item in res[1:]: try: - res[i] = _hextochr[item[:2]] + item[2:] + s += _hextochr[item[:2]] + item[2:] except KeyError: - res[i] = '%' + item + s += '%' + item except UnicodeDecodeError: - res[i] = unichr(int(item[:2], 16)) + item[2:] - return "".join(res) + s += unichr(int(item[:2], 16)) + item[2:] + return s def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" @@ -1186,9 +1192,12 @@ def unquote_plus(s): always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '0123456789' '_.-') -_safemaps = {} +_safe_map = {} +for i, c in zip(xrange(256), str(bytearray(xrange(256)))): + _safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i) +_safe_quoters = {} -def quote(s, safe = '/'): +def quote(s, safe='/'): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a @@ -1209,27 +1218,30 @@ def quote(s, safe = '/'): called on a path where the existing slash characters are used as reserved characters. """ + # fastpath + if not s: + return s cachekey = (safe, always_safe) try: - safe_map = _safemaps[cachekey] + (quoter, safe) = _safe_quoters[cachekey] except KeyError: - safe += always_safe - safe_map = {} - for i in range(256): - c = chr(i) - safe_map[c] = (c in safe) and c or ('%%%02X' % i) - _safemaps[cachekey] = safe_map - res = map(safe_map.__getitem__, s) - return ''.join(res) - -def quote_plus(s, safe = ''): + safe_map = _safe_map.copy() + safe_map.update([(c, c) for c in safe]) + quoter = safe_map.__getitem__ + safe = always_safe + safe + _safe_quoters[cachekey] = (quoter, safe) + if not s.rstrip(safe): + return s + return ''.join(map(quoter, s)) + +def quote_plus(s, safe=''): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: s = quote(s, safe + ' ') return s.replace(' ', '+') return quote(s, safe) -def urlencode(query,doseq=0): +def urlencode(query, doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. If any values in the query arg are sequences and doseq is true, each @@ -1391,7 +1403,6 @@ if sys.platform == 'darwin': return False - def getproxies_macosx_sysconf(): """Return a dictionary of scheme -> proxy server URL mappings. @@ -1400,8 +1411,6 @@ if sys.platform == 'darwin': """ return _get_proxies() - - def proxy_bypass(host): if getproxies_environment(): return proxy_bypass_environment(host) diff --git a/Misc/NEWS b/Misc/NEWS index f278b26..65781f8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,8 @@ What's New in Python 2.6.6? Library ------- +- Issue #1285086: Speed up urllib.quote and urllib.unquote for simple cases. + - Issue #5798: Handle select.poll flag oddities properly on OS X. This fixes test_asynchat and test_smtplib failures on OS X. -- cgit v0.12