diff options
| author | Victor Stinner <victor.stinner@gmail.com> | 2014-03-17 21:39:49 (GMT) | 
|---|---|---|
| committer | Victor Stinner <victor.stinner@gmail.com> | 2014-03-17 21:39:49 (GMT) | 
| commit | c49926748b36d639c3c6889ffcb90ab069bba8ae (patch) | |
| tree | d7bcbe2140cdec4e696033a369f86c3366fe7713 | |
| parent | 82170288ef6e0e2378528c669a310fb67c900177 (diff) | |
| parent | d6a91a7ab69fe449259d8719acf63cca9af45ba0 (diff) | |
| download | cpython-c49926748b36d639c3c6889ffcb90ab069bba8ae.zip cpython-c49926748b36d639c3c6889ffcb90ab069bba8ae.tar.gz cpython-c49926748b36d639c3c6889ffcb90ab069bba8ae.tar.bz2  | |
(Merge 3.4) Issue #20879: Delay the initialization of encoding and decoding
tables for base32, ascii85 and base85 codecs in the base64 module, and delay
the initialization of the unquote_to_bytes() table of the urllib.parse module,
to not waste memory if these modules are not used.
| -rwxr-xr-x | Lib/base64.py | 59 | ||||
| -rw-r--r-- | Lib/urllib/parse.py | 9 | ||||
| -rw-r--r-- | Misc/NEWS | 5 | 
3 files changed, 54 insertions, 19 deletions
diff --git a/Lib/base64.py b/Lib/base64.py index ad154ae..36c68a6 100755 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -138,15 +138,22 @@ def urlsafe_b64decode(s):  # Base32 encoding/decoding must be done in Python  _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' -_b32tab = [bytes([i]) for i in _b32alphabet] -_b32tab2 = [a + b for a in _b32tab for b in _b32tab] -_b32rev = {v: k for k, v in enumerate(_b32alphabet)} +_b32tab2 = None +_b32rev = None  def b32encode(s):      """Encode a byte string using Base32.      s is the byte string to encode.  The encoded byte string is returned.      """ +    global _b32tab2 +    # Delay the initialization of the table to not waste memory +    # if the function is never called +    if _b32tab2 is None: +        b32tab = [bytes((i,)) for i in _b32alphabet] +        _b32tab2 = [a + b for a in b32tab for b in b32tab] +        b32tab = None +      if not isinstance(s, bytes_types):          s = memoryview(s).tobytes()      leftover = len(s) % 5 @@ -193,6 +200,11 @@ def b32decode(s, casefold=False, map01=None):      the input is incorrectly padded or if there are non-alphabet      characters present in the input.      """ +    global _b32rev +    # Delay the initialization of the table to not waste memory +    # if the function is never called +    if _b32rev is None: +        _b32rev = {v: k for k, v in enumerate(_b32alphabet)}      s = _bytes_from_decode_data(s)      if len(s) % 8:          raise binascii.Error('Incorrect padding') @@ -274,6 +286,11 @@ def b16decode(s, casefold=False):  # Ascii85 encoding/decoding  # +_a85chars = None +_a85chars2 = None +_A85START = b"<~" +_A85END = b"~>" +  def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):      # Helper function for a85encode and b85encode      if not isinstance(b, bytes_types): @@ -284,8 +301,6 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):          b = b + b'\0' * padding      words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) -    a85chars2 = _a85chars2 -    a85chars = _a85chars      chunks = [b'z' if foldnuls and not word else                b'y' if foldspaces and word == 0x20202020 else                (chars2[word // 614125] + @@ -300,11 +315,6 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):      return b''.join(chunks) -_A85START = b"<~" -_A85END = b"~>" -_a85chars = [bytes([i]) for i in range(33, 118)] -_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] -  def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):      """Encode a byte string using Ascii85. @@ -324,6 +334,13 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):      adobe controls whether the encoded byte sequence is framed with <~ and ~>,      which is used by the Adobe implementation.      """ +    global _a85chars, _a85chars2 +    # Delay the initialization of tables to not waste memory +    # if the function is never called +    if _a85chars is None: +        _a85chars = [bytes((i,)) for i in range(33, 118)] +        _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] +      result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)      if adobe: @@ -408,10 +425,10 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):  # The following code is originally taken (with permission) from Mercurial -_b85chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ -            b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" -_b85chars = [bytes([i]) for i in _b85chars] -_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] +_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" +                b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") +_b85chars = None +_b85chars2 = None  _b85dec = None  def b85encode(b, pad=False): @@ -420,17 +437,25 @@ def b85encode(b, pad=False):      If pad is true, the input is padded with "\0" so its length is a multiple of      4 characters before encoding.      """ +    global _b85chars, _b85chars2 +    # Delay the initialization of tables to not waste memory +    # if the function is never called +    if _b85chars is None: +        _b85chars = [bytes((i,)) for i in _b85alphabet] +        _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]      return _85encode(b, _b85chars, _b85chars2, pad)  def b85decode(b):      """Decode base85-encoded byte array""" -    b = _bytes_from_decode_data(b)      global _b85dec +    # Delay the initialization of tables to not waste memory +    # if the function is never called      if _b85dec is None:          _b85dec = [None] * 256 -        for i, c in enumerate(_b85chars): -            _b85dec[c[0]] = i +        for i, c in enumerate(_b85alphabet): +            _b85dec[c] = i +    b = _bytes_from_decode_data(b)      padding = (-len(b)) % 5      b = b + b'~' * padding      out = [] diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 2ba3991..a2a912d 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -472,8 +472,7 @@ def urldefrag(url):      return _coerce_result(DefragResult(defrag, frag))  _hexdig = '0123456789ABCDEFabcdef' -_hextobyte = {(a + b).encode(): bytes([int(a + b, 16)]) -              for a in _hexdig for b in _hexdig} +_hextobyte = None  def unquote_to_bytes(string):      """unquote_to_bytes('abc%20def') -> b'abc def'.""" @@ -490,6 +489,12 @@ def unquote_to_bytes(string):          return string      res = [bits[0]]      append = res.append +    # Delay the initialization of the table to not waste memory +    # if the function is never called +    global _hextobyte +    if _hextobyte is None: +        _hextobyte = {(a + b).encode(): bytes([int(a + b, 16)]) +                      for a in _hexdig for b in _hexdig}      for item in bits[1:]:          try:              append(_hextobyte[item[:2]]) @@ -13,6 +13,11 @@ Core and Builtins  Library  ------- +- Issue #20879: Delay the initialization of encoding and decoding tables for +  base32, ascii85 and base85 codecs in the base64 module, and delay the +  initialization of the unquote_to_bytes() table of the urllib.parse module, to +  not waste memory if these modules are not used. +  What's New in Python 3.4.0?  ===========================  | 
