From 9c1aed8f94a2b7a40c3a4db60cb289c90e001896 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 23 Nov 2013 11:13:36 +1000 Subject: Close #7475: Restore binary & text transform codecs The codecs themselves were restored in Python 3.2, this completes the restoration by adding back the convenience aliases. These aliases were originally left out due to confusing errors when attempting to use them with the text encoding specific convenience methods. Python 3.4 includes several improvements to those errors, thus permitting the aliases to be restored as well. --- Doc/library/codecs.rst | 116 ++++++++++++++++++++++++++++------------------- Doc/whatsnew/3.4.rst | 50 +++++++++++++------- Lib/encodings/aliases.py | 36 +++++++-------- Lib/test/test_codecs.py | 20 ++++++++ 4 files changed, 142 insertions(+), 80 deletions(-) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 358fde7..ef79918 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1188,6 +1188,9 @@ common use case for codecs, the underlying codec infrastructure supports arbitrary data transforms rather than just text encodings). For asymmetric codecs, the stated purpose describes the encoding direction. +Text Encodings +^^^^^^^^^^^^^^ + The following codecs provide :class:`str` to :class:`bytes` encoding and :term:`bytes-like object` to :class:`str` decoding, similar to the Unicode text encodings. @@ -1234,62 +1237,83 @@ encodings. | | | .. deprecated:: 3.3 | +--------------------+---------+---------------------------+ -The following codecs provide :term:`bytes-like object` to :class:`bytes` -mappings. - - -.. tabularcolumns:: |l|L|L| - -+----------------------+------------------------------+------------------------------+ -| Codec | Purpose | Encoder / decoder | -+======================+==============================+==============================+ -| base64_codec [#b64]_ | Convert operand to MIME | :meth:`base64.b64encode` / | -| | base64 (the result always | :meth:`base64.b64decode` | -| | includes a trailing | | -| | ``'\n'``) | | -| | | | -| | .. versionchanged:: 3.4 | | -| | accepts any | | -| | :term:`bytes-like object` | | -| | as input for encoding and | | -| | decoding | | -+----------------------+------------------------------+------------------------------+ -| bz2_codec | Compress the operand | :meth:`bz2.compress` / | -| | using bz2 | :meth:`bz2.decompress` | -+----------------------+------------------------------+------------------------------+ -| hex_codec | Convert operand to | :meth:`base64.b16encode` / | -| | hexadecimal | :meth:`base64.b16decode` | -| | representation, with two | | -| | digits per byte | | -+----------------------+------------------------------+------------------------------+ -| quopri_codec | Convert operand to MIME | :meth:`quopri.encodestring` /| -| | quoted printable | :meth:`quopri.decodestring` | -+----------------------+------------------------------+------------------------------+ -| uu_codec | Convert the operand using | :meth:`uu.encode` / | -| | uuencode | :meth:`uu.decode` | -+----------------------+------------------------------+------------------------------+ -| zlib_codec | Compress the operand | :meth:`zlib.compress` / | -| | using gzip | :meth:`zlib.decompress` | -+----------------------+------------------------------+------------------------------+ +.. _binary-transforms: + +Binary Transforms +^^^^^^^^^^^^^^^^^ + +The following codecs provide binary transforms: :term:`bytes-like object` +to :class:`bytes` mappings. + + +.. tabularcolumns:: |l|L|L|L| + ++----------------------+------------------+------------------------------+------------------------------+ +| Codec | Aliases | Purpose | Encoder / decoder | ++======================+==================+==============================+==============================+ +| base64_codec [#b64]_ | base64, base_64 | Convert operand to MIME | :meth:`base64.b64encode` / | +| | | base64 (the result always | :meth:`base64.b64decode` | +| | | includes a trailing | | +| | | ``'\n'``) | | +| | | | | +| | | .. versionchanged:: 3.4 | | +| | | accepts any | | +| | | :term:`bytes-like object` | | +| | | as input for encoding and | | +| | | decoding | | ++----------------------+------------------+------------------------------+------------------------------+ +| bz2_codec | bz2 | Compress the operand | :meth:`bz2.compress` / | +| | | using bz2 | :meth:`bz2.decompress` | ++----------------------+------------------+------------------------------+------------------------------+ +| hex_codec | hex | Convert operand to | :meth:`base64.b16encode` / | +| | | hexadecimal | :meth:`base64.b16decode` | +| | | representation, with two | | +| | | digits per byte | | ++----------------------+------------------+------------------------------+------------------------------+ +| quopri_codec | quopri, | Convert operand to MIME | :meth:`quopri.encodestring` /| +| | quotedprintable, | quoted printable | :meth:`quopri.decodestring` | +| | quoted_printable | | | ++----------------------+------------------+------------------------------+------------------------------+ +| uu_codec | uu | Convert the operand using | :meth:`uu.encode` / | +| | | uuencode | :meth:`uu.decode` | ++----------------------+------------------+------------------------------+------------------------------+ +| zlib_codec | zip, zlib | Compress the operand | :meth:`zlib.compress` / | +| | | using gzip | :meth:`zlib.decompress` | ++----------------------+------------------+------------------------------+------------------------------+ .. [#b64] In addition to :term:`bytes-like objects `, ``'base64_codec'`` also accepts ASCII-only instances of :class:`str` for decoding +.. versionadded:: 3.2 + Restoration of the binary transforms. -The following codecs provide :class:`str` to :class:`str` mappings. +.. versionchanged:: 3.4 + Restoration of the aliases for the binary transforms. -.. tabularcolumns:: |l|L| -+--------------------+---------------------------+ -| Codec | Purpose | -+====================+===========================+ -| rot_13 | Returns the Caesar-cypher | -| | encryption of the operand | -+--------------------+---------------------------+ +.. _text-transforms: + +Text Transforms +^^^^^^^^^^^^^^^ + +The following codec provides a text transform: a :class:`str` to :class:`str` +mapping. + +.. tabularcolumns:: |l|l|L| + ++--------------------+---------+---------------------------+ +| Codec | Aliases | Purpose | ++====================+=========+===========================+ +| rot_13 | rot13 | Returns the Caesar-cypher | +| | | encryption of the operand | ++--------------------+---------+---------------------------+ .. versionadded:: 3.2 - bytes-to-bytes and str-to-str codecs. + Restoration of the ``rot_13`` text transform. + +.. versionchanged:: 3.4 + Restoration of the ``rot13`` alias. :mod:`encodings.idna` --- Internationalized Domain Names in Applications diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 09d8be8..6fc0e48 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -103,7 +103,8 @@ New expected features for Python implementations: * :ref:`PEP 446: Make newly created file descriptors non-inheritable `. * command line option for :ref:`isolated mode `, (:issue:`16499`). -* improvements to handling of non-Unicode codecs +* :ref:`improvements ` in the handling of + codecs that are not text encodings Significantly Improved Library Modules: @@ -173,8 +174,10 @@ PEP 446: Make newly created file descriptors non-inheritable PEP written and implemented by Victor Stinner. -Improvements to handling of non-Unicode codecs -============================================== +.. _codec-handling-improvements: + +Improvements to codec handling +============================== Since it was first introduced, the :mod:`codecs` module has always been intended to operate as a type-neutral dynamic encoding and decoding @@ -186,7 +189,7 @@ fact. As a key step in clarifying the situation, the :meth:`codecs.encode` and :meth:`codecs.decode` convenience functions are now properly documented in Python 2.7, 3.3 and 3.4. These functions have existed in the :mod:`codecs` -module and have been covered by the regression test suite since Python 2.4, +module (and have been covered by the regression test suite) since Python 2.4, but were previously only discoverable through runtime introspection. Unlike the convenience methods on :class:`str`, :class:`bytes` and @@ -199,43 +202,58 @@ In Python 3.4, the interpreter is able to identify the known non-text encodings provided in the standard library and direct users towards these general purpose convenience functions when appropriate:: - >>> import codecs - - >>> b"abcdef".decode("hex_codec") + >>> b"abcdef".decode("hex") Traceback (most recent call last): File "", line 1, in - LookupError: 'hex_codec' is not a text encoding; use codecs.decode() to handle arbitrary codecs + LookupError: 'hex' is not a text encoding; use codecs.decode() to handle arbitrary codecs - >>> "hello".encode("rot_13") + >>> "hello".encode("rot13") Traceback (most recent call last): File "", line 1, in - LookupError: 'rot_13' is not a text encoding; use codecs.encode() to handle arbitrary codecs + LookupError: 'rot13' is not a text encoding; use codecs.encode() to handle arbitrary codecs In a related change, whenever it is feasible without breaking backwards compatibility, exceptions raised during encoding and decoding operations will be wrapped in a chained exception of the same type that mentions the name of the codec responsible for producing the error:: - >>> codecs.decode(b"abcdefgh", "hex_codec") + >>> import codecs + + >>> codecs.decode(b"abcdefgh", "hex") binascii.Error: Non-hexadecimal digit found The above exception was the direct cause of the following exception: Traceback (most recent call last): File "", line 1, in - binascii.Error: decoding with 'hex_codec' codec failed (Error: Non-hexadecimal digit found) + binascii.Error: decoding with 'hex' codec failed (Error: Non-hexadecimal digit found) - >>> codecs.encode("hello", "bz2_codec") + >>> codecs.encode("hello", "bz2") TypeError: 'str' does not support the buffer interface The above exception was the direct cause of the following exception: Traceback (most recent call last): File "", line 1, in - TypeError: encoding with 'bz2_codec' codec failed (TypeError: 'str' does not support the buffer interface) + TypeError: encoding with 'bz2' codec failed (TypeError: 'str' does not support the buffer interface) + +Finally, as the examples above show, these improvements have permitted +the restoration of the convenience aliases for the non-Unicode codecs that +were themselves restored in Python 3.2. This means that encoding binary data +to and from its hexadecimal representation (for example) can now be written +as:: + + >>> from codecs import encode, decode + >>> encode(b"hello", "hex") + b'68656c6c6f' + >>> decode(b"68656c6c6f", "hex") + b'hello' + +The binary and text transforms provided in the standard library are detailed +in :ref:`binary-transforms` and :ref:`text-transforms`. -(Contributed by Nick Coghlan in :issue:`17827`, :issue:`17828` and -:issue:`19619`) +(Contributed by Nick Coghlan in :issue:`7475`, , :issue:`17827`, +:issue:`17828` and :issue:`19619`) .. _pep-451: diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index 235deb5..331095b 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -33,9 +33,9 @@ aliases = { 'us' : 'ascii', 'us_ascii' : 'ascii', - ## base64_codec codec - #'base64' : 'base64_codec', - #'base_64' : 'base64_codec', + # base64_codec codec + 'base64' : 'base64_codec', + 'base_64' : 'base64_codec', # big5 codec 'big5_tw' : 'big5', @@ -45,8 +45,8 @@ aliases = { 'big5_hkscs' : 'big5hkscs', 'hkscs' : 'big5hkscs', - ## bz2_codec codec - #'bz2' : 'bz2_codec', + # bz2_codec codec + 'bz2' : 'bz2_codec', # cp037 codec '037' : 'cp037', @@ -248,8 +248,8 @@ aliases = { 'cp936' : 'gbk', 'ms936' : 'gbk', - ## hex_codec codec - #'hex' : 'hex_codec', + # hex_codec codec + 'hex' : 'hex_codec', # hp_roman8 codec 'roman8' : 'hp_roman8', @@ -450,13 +450,13 @@ aliases = { 'cp154' : 'ptcp154', 'cyrillic_asian' : 'ptcp154', - ## quopri_codec codec - #'quopri' : 'quopri_codec', - #'quoted_printable' : 'quopri_codec', - #'quotedprintable' : 'quopri_codec', + # quopri_codec codec + 'quopri' : 'quopri_codec', + 'quoted_printable' : 'quopri_codec', + 'quotedprintable' : 'quopri_codec', - ## rot_13 codec - #'rot13' : 'rot_13', + # rot_13 codec + 'rot13' : 'rot_13', # shift_jis codec 'csshiftjis' : 'shift_jis', @@ -518,12 +518,12 @@ aliases = { 'utf8_ucs2' : 'utf_8', 'utf8_ucs4' : 'utf_8', - ## uu_codec codec - #'uu' : 'uu_codec', + # uu_codec codec + 'uu' : 'uu_codec', - ## zlib_codec codec - #'zip' : 'zlib_codec', - #'zlib' : 'zlib_codec', + # zlib_codec codec + 'zip' : 'zlib_codec', + 'zlib' : 'zlib_codec', # temporary mac CJK aliases, will be replaced by proper codecs in 3.1 'x_mac_japanese' : 'shift_jis', diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 506ba7d..07a6a5e 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2320,18 +2320,29 @@ bytes_transform_encodings = [ "quopri_codec", "hex_codec", ] + +transform_aliases = { + "base64_codec": ["base64", "base_64"], + "uu_codec": ["uu"], + "quopri_codec": ["quopri", "quoted_printable", "quotedprintable"], + "hex_codec": ["hex"], + "rot_13": ["rot13"], +} + try: import zlib except ImportError: pass else: bytes_transform_encodings.append("zlib_codec") + transform_aliases["zlib_codec"] = ["zip", "zlib"] try: import bz2 except ImportError: pass else: bytes_transform_encodings.append("bz2_codec") + transform_aliases["bz2_codec"] = ["bz2"] class TransformCodecTest(unittest.TestCase): @@ -2445,6 +2456,15 @@ class TransformCodecTest(unittest.TestCase): # Unfortunately, the bz2 module throws OSError, which the codec # machinery currently can't wrap :( + # Ensure codec aliases from http://bugs.python.org/issue7475 work + def test_aliases(self): + for codec_name, aliases in transform_aliases.items(): + expected_name = codecs.lookup(codec_name).name + for alias in aliases: + with self.subTest(alias=alias): + info = codecs.lookup(alias) + self.assertEqual(info.name, expected_name) + # The codec system tries to wrap exceptions in order to ensure the error # mentions the operation being performed and the codec involved. We -- cgit v0.12