diff options
-rw-r--r-- | Doc/c-api/buffer.rst | 12 | ||||
-rw-r--r-- | Doc/library/functions.rst | 75 | ||||
-rw-r--r-- | Doc/library/stdtypes.rst | 12 | ||||
-rw-r--r-- | Doc/reference/datamodel.rst | 28 | ||||
-rw-r--r-- | Lib/test/test_builtin.py | 1 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 20 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
7 files changed, 101 insertions, 49 deletions
diff --git a/Doc/c-api/buffer.rst b/Doc/c-api/buffer.rst index d98ece3..740b575 100644 --- a/Doc/c-api/buffer.rst +++ b/Doc/c-api/buffer.rst @@ -1,5 +1,10 @@ .. highlightlang:: c +.. index:: + single: buffer protocol + single: buffer interface; (see buffer protocol) + single: buffer object; (see buffer protocol) + .. _bufferobjects: Buffer Protocol @@ -9,9 +14,6 @@ Buffer Protocol .. sectionauthor:: Benjamin Peterson -.. index:: - single: buffer interface - Certain objects available in Python wrap access to an underlying memory array or *buffer*. Such objects include the built-in :class:`bytes` and :class:`bytearray`, and some extension types like :class:`array.array`. @@ -23,8 +25,8 @@ characteristic of being backed by a possibly large memory buffer. It is then desireable, in some situations, to access that buffer directly and without intermediate copying. -Python provides such a facility at the C level in the form of the *buffer -protocol*. This protocol has two sides: +Python provides such a facility at the C level in the form of the :ref:`buffer +protocol <bufferobjects>`. This protocol has two sides: .. index:: single: PyBufferProcs diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index f9e0d0a..0d8f61c 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -528,7 +528,7 @@ are always available. They are listed here in alphabetical order. is used by most built-in types: :ref:`formatspec`. The default *format_spec* is an empty string which usually gives the same - effect as calling ``str(value)``. + effect as calling :func:`str(value) <str>`. A call to ``format(value, format_spec)`` is translated to ``type(value).__format__(format_spec)`` which bypasses the instance @@ -1246,38 +1246,51 @@ are always available. They are listed here in alphabetical order. For more information on static methods, consult the documentation on the standard type hierarchy in :ref:`types`. + .. index:: + single: string; str() (built-in function) + .. function:: str(object='') - str(object[, encoding[, errors]]) - - Return a string version of an object, using one of the following modes: - - If *encoding* and/or *errors* are given, :func:`str` will decode the - *object* which can either be a byte string or a character buffer using - the codec for *encoding*. The *encoding* parameter is a string giving - the name of an encoding; if the encoding is not known, :exc:`LookupError` - is raised. Error handling is done according to *errors*; this specifies the - treatment of characters which are invalid in the input encoding. If - *errors* is ``'strict'`` (the default), a :exc:`ValueError` is raised on - errors, while a value of ``'ignore'`` causes errors to be silently ignored, - and a value of ``'replace'`` causes the official Unicode replacement character, - U+FFFD, to be used to replace input characters which cannot be decoded. - See also the :mod:`codecs` module. - - When only *object* is given, this returns its nicely printable representation. - For strings, this is the string itself. The difference with ``repr(object)`` - is that ``str(object)`` does not always attempt to return a string that is - acceptable to :func:`eval`; its goal is to return a printable string. - With no arguments, this returns the empty string. - - Objects can specify what ``str(object)`` returns by defining a :meth:`__str__` - special method. - - For more information on strings see :ref:`typesseq` which describes sequence - functionality (strings are sequences), and also the string-specific methods - described in the :ref:`string-methods` section. To output formatted strings, - see the :ref:`string-formatting` section. In addition see the - :ref:`stringservices` section. + str(object=b'', encoding='utf-8', errors='strict') + + Return a :ref:`string <typesseq>` version of *object*. If *object* is not + provided, returns the empty string. Otherwise, the behavior of ``str()`` + depends on whether *encoding* or *errors* is given, as follows. + + If neither *encoding* nor *errors* is given, ``str(object)`` returns + :meth:`object.__str__() <object.__str__>`, which is the "informal" or nicely + printable string representation of *object*. For string objects, this is + the string itself. If *object* does not have a :meth:`~object.__str__` + method, then :func:`str` falls back to returning + :meth:`repr(object) <repr>`. + + .. index:: + single: buffer protocol; str() (built-in function) + single: bytes; str() (built-in function) + + If at least one of *encoding* or *errors* is given, *object* should be a + :class:`bytes` or :class:`bytearray` object, or more generally any object + that supports the :ref:`buffer protocol <bufferobjects>`. In this case, if + *object* is a :class:`bytes` (or :class:`bytearray`) object, then + ``str(bytes, encoding, errors)`` is equivalent to + :meth:`bytes.decode(encoding, errors) <bytes.decode>`. Otherwise, the bytes + object underlying the buffer object is obtained before calling + :meth:`bytes.decode`. See the :ref:`typesseq` section, the + :ref:`typememoryview` section, and :ref:`bufferobjects` for information on + buffer objects. + + Passing a :class:`bytes` object to :func:`str` without the *encoding* + or *errors* arguments falls under the first case of returning the informal + string representation (see also the :option:`-b` command-line option to + Python). For example:: + + >>> str(b'Zoot!') + "b'Zoot!'" + + ``str`` is a built-in :term:`type`. For more information on the string + type and its methods, see the :ref:`typesseq` and :ref:`string-methods` + sections. To output formatted strings, see the :ref:`string-formatting` + section. In addition, see the :ref:`stringservices` section. .. function:: sum(iterable[, start]) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index f6eca56..4b224b3 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -794,6 +794,9 @@ More information about generators can be found in :ref:`the documentation for the yield expression <yieldexpr>`. +.. index:: + single: string; sequence types + .. _typesseq: Sequence Types --- :class:`str`, :class:`bytes`, :class:`bytearray`, :class:`list`, :class:`tuple`, :class:`range` @@ -804,17 +807,20 @@ byte arrays (:class:`bytearray` objects), lists, tuples, and range objects. For other containers see the built in :class:`dict` and :class:`set` classes, and the :mod:`collections` module. - .. index:: object: sequence - object: string object: bytes object: bytearray object: tuple object: list object: range + object: string + single: string + single: str() (built-in function); (see also string) -Strings contain Unicode characters. Their literals are written in single or +Textual data in Python is handled with :class:`str` objects, or :dfn:`strings`. +Strings are immutable :ref:`sequences <typesseq>` of Unicode code points. +String literals are written in single or double quotes: ``'xyzzy'``, ``"frobozz"``. See :ref:`strings` for more about string literals. In addition to the functionality described here, there are also string-specific methods described in the :ref:`string-methods` section. diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 322e8c8..8bbbc99 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1124,10 +1124,11 @@ Basic customization modules are still available at the time when the :meth:`__del__` method is called. + .. index:: + single: repr() (built-in function); __repr__() (object method) -.. method:: object.__repr__(self) - .. index:: builtin: repr +.. method:: object.__repr__(self) Called by the :func:`repr` built-in function to compute the "official" string representation of an object. If at all possible, this should look like a @@ -1141,18 +1142,25 @@ Basic customization This is typically used for debugging, so it is important that the representation is information-rich and unambiguous. + .. index:: + single: string; __str__() (object method) + single: format() (built-in function); __str__() (object method) + single: print() (built-in function); __str__() (object method) + .. method:: object.__str__(self) - .. index:: - builtin: str - builtin: print + Called by :func:`str(object) <str>` and the built-in functions + :func:`format` and :func:`print` to compute the "informal" or nicely + printable string representation of an object. The return value must be a + :ref:`string <textseq>` object. - Called by the :func:`str` built-in function and by the :func:`print` function - to compute the "informal" string representation of an object. This differs - from :meth:`__repr__` in that it does not have to be a valid Python - expression: a more convenient or concise representation may be used instead. - The return value must be a string object. + This method differs from :meth:`object.__repr__` in that there is no + expectation that :meth:`__str__` return a valid Python expression: a more + convenient or concise representation can be used. + + The default implementation defined by the built-in type :class:`object` + calls :meth:`object.__repr__`. .. XXX what about subclasses of string? diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 55fb63a..dab56ce 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1184,6 +1184,7 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, setattr, sys, 1, 'spam') self.assertRaises(TypeError, setattr) + # test_str(): see test_unicode.py and test_bytes.py for str() tests. def test_sum(self): self.assertEqual(sum([]), 0) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 000ae6a..47af8b9 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1100,6 +1100,26 @@ class UnicodeTest(string_tests.CommonTest, self.assertRaises(TypeError, str, 42, 42, 42) + def test_constructor_keyword_args(self): + """Pass various keyword argument combinations to the constructor.""" + # The object argument can be passed as a keyword. + self.assertEqual(str(object='foo'), 'foo') + self.assertEqual(str(object=b'foo', encoding='utf-8'), 'foo') + # The errors argument without encoding triggers "decode" mode. + self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'" + self.assertEqual(str(object=b'foo', errors='strict'), 'foo') + + def test_constructor_defaults(self): + """Check the constructor argument defaults.""" + # The object argument defaults to '' or b''. + self.assertEqual(str(), '') + self.assertEqual(str(errors='strict'), '') + utf8_cent = '¢'.encode('utf-8') + # The encoding argument defaults to utf-8. + self.assertEqual(str(utf8_cent, errors='strict'), '¢') + # The errors argument defaults to strict. + self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii') + def test_codecs_utf7(self): utfTests = [ ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example @@ -731,6 +731,8 @@ Tools/Demos Documentation ------------- +- Issue #13538: Improve str() and object.__str__() documentation. + - Issue #16400: Update the description of which versions of a given package PyPI displays. |