diff options
author | Chris Jerdonek <chris.jerdonek@gmail.com> | 2012-11-21 01:45:51 (GMT) |
---|---|---|
committer | Chris Jerdonek <chris.jerdonek@gmail.com> | 2012-11-21 01:45:51 (GMT) |
commit | 5fae0e58549c9270b188a1591ffa6cc4c2d9ab4e (patch) | |
tree | 5e955cf6e54ee57cf3531687ddf4c01681c34786 | |
parent | 9ddfb19e4197be4abc0b74236f207a3efc853c04 (diff) | |
download | cpython-5fae0e58549c9270b188a1591ffa6cc4c2d9ab4e.zip cpython-5fae0e58549c9270b188a1591ffa6cc4c2d9ab4e.tar.gz cpython-5fae0e58549c9270b188a1591ffa6cc4c2d9ab4e.tar.bz2 |
Improve str() and object.__str__() documentation (issue #13538).
-rw-r--r-- | Doc/c-api/buffer.rst | 12 | ||||
-rw-r--r-- | Doc/library/functions.rst | 71 | ||||
-rw-r--r-- | Doc/library/stdtypes.rst | 25 | ||||
-rw-r--r-- | Doc/reference/datamodel.rst | 28 | ||||
-rw-r--r-- | Lib/test/test_builtin.py | 1 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 20 | ||||
-rw-r--r-- | Misc/NEWS | 2 |
7 files changed, 105 insertions, 54 deletions
diff --git a/Doc/c-api/buffer.rst b/Doc/c-api/buffer.rst index d636935..0b521df 100644 --- a/Doc/c-api/buffer.rst +++ b/Doc/c-api/buffer.rst @@ -1,5 +1,10 @@ .. highlightlang:: c +.. index:: + single: buffer protocol + single: buffer interface; (see buffer protocol) + single: buffer object; (see buffer protocol) + .. _bufferobjects: Buffer Protocol @@ -10,9 +15,6 @@ Buffer Protocol .. sectionauthor:: Stefan Krah -.. index:: - single: buffer interface - Certain objects available in Python wrap access to an underlying memory array or *buffer*. Such objects include the built-in :class:`bytes` and :class:`bytearray`, and some extension types like :class:`array.array`. @@ -24,8 +26,8 @@ characteristic of being backed by a possibly large memory buffer. It is then desirable, in some situations, to access that buffer directly and without intermediate copying. -Python provides such a facility at the C level in the form of the *buffer -protocol*. This protocol has two sides: +Python provides such a facility at the C level in the form of the :ref:`buffer +protocol <bufferobjects>`. This protocol has two sides: .. index:: single: PyBufferProcs diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 024ef0f..f027bac 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -534,7 +534,7 @@ are always available. They are listed here in alphabetical order. is used by most built-in types: :ref:`formatspec`. The default *format_spec* is an empty string which usually gives the same - effect as calling ``str(value)``. + effect as calling :func:`str(value) <str>`. A call to ``format(value, format_spec)`` is translated to ``type(value).__format__(format_spec)`` which bypasses the instance @@ -1249,37 +1249,50 @@ are always available. They are listed here in alphabetical order. For more information on static methods, consult the documentation on the standard type hierarchy in :ref:`types`. + .. index:: + single: string; str() (built-in function) + .. _func-str: .. function:: str(object='') - str(object[, encoding[, errors]]) - - Return a :ref:`string <textseq>` version of an object, using one of the - following modes: - - If *encoding* and/or *errors* are given, :func:`str` will decode the - *object* which can either be a byte string or a character buffer using - the codec for *encoding*. The *encoding* parameter is a string giving - the name of an encoding; if the encoding is not known, :exc:`LookupError` - is raised. Error handling is done according to *errors*; this specifies the - treatment of characters which are invalid in the input encoding. If - *errors* is ``'strict'`` (the default), a :exc:`ValueError` is raised on - errors, while a value of ``'ignore'`` causes errors to be silently ignored, - and a value of ``'replace'`` causes the official Unicode replacement character, - U+FFFD, to be used to replace input characters which cannot be decoded. - See also the :mod:`codecs` module. - - When only *object* is given, this returns its nicely printable representation. - For strings, this is the string itself. The difference with ``repr(object)`` - is that ``str(object)`` does not always attempt to return a string that is - acceptable to :func:`eval`; its goal is to return a printable string. - With no arguments, this returns the empty string. - - Objects can specify what ``str(object)`` returns by defining a :meth:`__str__` - special method. - - For more information on strings and string methods, see the :ref:`textseq` - section. To output formatted strings, see the :ref:`string-formatting` + str(object=b'', encoding='utf-8', errors='strict') + + Return a :ref:`string <textseq>` version of *object*. If *object* is not + provided, returns the empty string. Otherwise, the behavior of ``str()`` + depends on whether *encoding* or *errors* is given, as follows. + + If neither *encoding* nor *errors* is given, ``str(object)`` returns + :meth:`object.__str__() <object.__str__>`, which is the "informal" or nicely + printable string representation of *object*. For string objects, this is + the string itself. If *object* does not have a :meth:`~object.__str__` + method, then :func:`str` falls back to returning + :meth:`repr(object) <repr>`. + + .. index:: + single: buffer protocol; str() (built-in function) + single: bytes; str() (built-in function) + + If at least one of *encoding* or *errors* is given, *object* should be a + :class:`bytes` or :class:`bytearray` object, or more generally any object + that supports the :ref:`buffer protocol <bufferobjects>`. In this case, if + *object* is a :class:`bytes` (or :class:`bytearray`) object, then + ``str(bytes, encoding, errors)`` is equivalent to + :meth:`bytes.decode(encoding, errors) <bytes.decode>`. Otherwise, the bytes + object underlying the buffer object is obtained before calling + :meth:`bytes.decode`. See :ref:`binaryseq` and + :ref:`bufferobjects` for information on buffer objects. + + Passing a :class:`bytes` object to :func:`str` without the *encoding* + or *errors* arguments falls under the first case of returning the informal + string representation (see also the :option:`-b` command-line option to + Python). For example:: + + >>> str(b'Zoot!') + "b'Zoot!'" + + ``str`` is a built-in :term:`type`. For more information on the string + type and its methods, see the :ref:`textseq` and :ref:`string-methods` + sections. To output formatted strings, see the :ref:`string-formatting` section. In addition, see the :ref:`stringservices` section. diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 27be4f5..bf5d756 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1346,19 +1346,18 @@ range(2, 1, 3)`` or ``range(0, 3, 2) == range(0, 4, 2)``.) The :attr:`start`, :attr:`stop` and :attr:`step` attributes. +.. index:: + single: string; text sequence type + single: str() (built-in function); (see also string) + object: string + .. _textseq: Text Sequence Type --- :class:`str` =================================== -.. index:: - object: string - object: bytes - object: bytearray - object: io.StringIO - - -Textual data in Python is handled with ``str`` objects, which are immutable +Textual data in Python is handled with :class:`str` objects, or :dfn:`strings`. +Strings are immutable :ref:`sequences <typesseq>` of Unicode code points. String literals are written in a variety of ways: @@ -1383,6 +1382,9 @@ function :func:`str`. Since there is no separate "character" type, indexing a string produces strings of length 1. That is, for a non-empty string *s*, ``s[0] == s[0:1]``. +.. index:: + object: io.StringIO + There is also no mutable string type, but :meth:`str.join` or :class:`io.StringIO` can be used to efficiently construct strings from multiple fragments. @@ -2064,6 +2066,9 @@ that ``'\0'`` is the end of the string. longer replaced by ``%g`` conversions. +.. index:: + single: buffer protocol; binary sequence types + .. _binaryseq: Binary Sequence Types --- :class:`bytes`, :class:`bytearray`, :class:`memoryview` @@ -2077,8 +2082,8 @@ Binary Sequence Types --- :class:`bytes`, :class:`bytearray`, :class:`memoryview The core built-in types for manipulating binary data are :class:`bytes` and :class:`bytearray`. They are supported by :class:`memoryview` which uses -the buffer protocol to access the memory of other binary objects without -needing to make a copy. +the :ref:`buffer protocol <bufferobjects>` to access the memory of other +binary objects without needing to make a copy. The :mod:`array` module supports efficient storage of basic data types like 32-bit integers and IEEE754 double-precision floating values. diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index d093383..b7a6736 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1140,10 +1140,11 @@ Basic customization modules are still available at the time when the :meth:`__del__` method is called. + .. index:: + single: repr() (built-in function); __repr__() (object method) -.. method:: object.__repr__(self) - .. index:: builtin: repr +.. method:: object.__repr__(self) Called by the :func:`repr` built-in function to compute the "official" string representation of an object. If at all possible, this should look like a @@ -1157,18 +1158,25 @@ Basic customization This is typically used for debugging, so it is important that the representation is information-rich and unambiguous. + .. index:: + single: string; __str__() (object method) + single: format() (built-in function); __str__() (object method) + single: print() (built-in function); __str__() (object method) + .. method:: object.__str__(self) - .. index:: - builtin: str - builtin: print + Called by :func:`str(object) <str>` and the built-in functions + :func:`format` and :func:`print` to compute the "informal" or nicely + printable string representation of an object. The return value must be a + :ref:`string <textseq>` object. - Called by the :func:`str` built-in function and by the :func:`print` function - to compute the "informal" string representation of an object. This differs - from :meth:`__repr__` in that it does not have to be a valid Python - expression: a more convenient or concise representation may be used instead. - The return value must be a string object. + This method differs from :meth:`object.__repr__` in that there is no + expectation that :meth:`__str__` return a valid Python expression: a more + convenient or concise representation can be used. + + The default implementation defined by the built-in type :class:`object` + calls :meth:`object.__repr__`. .. XXX what about subclasses of string? diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index c32992c..19d7c70 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1286,6 +1286,7 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, setattr, sys, 1, 'spam') self.assertRaises(TypeError, setattr) + # test_str(): see test_unicode.py and test_bytes.py for str() tests. def test_sum(self): self.assertEqual(sum([]), 0) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index a811c4c..9aaedd3 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1182,6 +1182,26 @@ class UnicodeTest(string_tests.CommonTest, self.assertRaises(TypeError, str, 42, 42, 42) + def test_constructor_keyword_args(self): + """Pass various keyword argument combinations to the constructor.""" + # The object argument can be passed as a keyword. + self.assertEqual(str(object='foo'), 'foo') + self.assertEqual(str(object=b'foo', encoding='utf-8'), 'foo') + # The errors argument without encoding triggers "decode" mode. + self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'" + self.assertEqual(str(object=b'foo', errors='strict'), 'foo') + + def test_constructor_defaults(self): + """Check the constructor argument defaults.""" + # The object argument defaults to '' or b''. + self.assertEqual(str(), '') + self.assertEqual(str(errors='strict'), '') + utf8_cent = '¢'.encode('utf-8') + # The encoding argument defaults to utf-8. + self.assertEqual(str(utf8_cent, errors='strict'), '¢') + # The errors argument defaults to strict. + self.assertRaises(UnicodeDecodeError, str, utf8_cent, encoding='ascii') + def test_codecs_utf7(self): utfTests = [ ('A\u2262\u0391.', b'A+ImIDkQ.'), # RFC2152 example @@ -283,6 +283,8 @@ Tools/Demos Documentation ------------- +- Issue #13538: Improve str() and object.__str__() documentation. + - Issue #16489: Make it clearer that importlib.find_loader() requires any and all packages to be separately imported. |