From f9e1f1128b1d040cabb519ab18f770aa0c456744 Mon Sep 17 00:00:00 2001 From: Petri Lehtinen Date: Sat, 1 Sep 2012 07:27:58 +0300 Subject: #13769: Enhance docs for ensure_ascii semantics in JSON decoding functions --- Doc/library/json.rst | 30 ++++++++++++++++++------------ Lib/json/__init__.py | 18 ++++++++++-------- Lib/json/encoder.py | 9 ++++++--- Misc/NEWS | 3 +++ 4 files changed, 37 insertions(+), 23 deletions(-) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 69ebc4f..ed5cf21 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -127,11 +127,14 @@ Basic Usage :class:`float`, :class:`bool`, ``None``) will be skipped instead of raising a :exc:`TypeError`. - If *ensure_ascii* is ``False`` (default: ``True``), then some chunks written - to *fp* may be :class:`unicode` instances, subject to normal Python - :class:`str` to :class:`unicode` coercion rules. Unless ``fp.write()`` - explicitly understands :class:`unicode` (as in :func:`codecs.getwriter`) this - is likely to cause an error. + If *ensure_ascii* is ``True`` (the default), all non-ASCII characters in the + output are escaped with ``\uXXXX`` sequences, and the result is a + :class:`str` instance consisting of ASCII characters only. If + *ensure_ascii* is ``False``, some chunks written to *fp* may be + :class:`unicode` instances. This usually happens because the input contains + unicode strings or the *encoding* parameter is used. Unless ``fp.write()`` + explicitly understands :class:`unicode` (as in :func:`codecs.getwriter`) + this is likely to cause an error. If *check_circular* is ``False`` (default: ``True``), then the circular reference check for container types will be skipped and a circular reference @@ -168,11 +171,11 @@ Basic Usage .. function:: dumps(obj[, skipkeys[, ensure_ascii[, check_circular[, allow_nan[, cls[, indent[, separators[, encoding[, default[, **kw]]]]]]]]]]) - Serialize *obj* to a JSON formatted :class:`str`. + Serialize *obj* to a JSON formatted :class:`str`. If *ensure_ascii* is + ``False``, the result may contain non-ASCII characters and the return value + may be a :class:`unicode` instance. - If *ensure_ascii* is ``False``, then the return value will be a - :class:`unicode` instance. The other arguments have the same meaning as in - :func:`dump`. + The arguments have the same meaning as in :func:`dump`. .. note:: @@ -371,9 +374,12 @@ Encoders and Decoders attempt encoding of keys that are not str, int, long, float or None. If *skipkeys* is ``True``, such items are simply skipped. - If *ensure_ascii* is ``True`` (the default), the output is guaranteed to be - :class:`str` objects with all incoming unicode characters escaped. If - *ensure_ascii* is ``False``, the output will be a unicode object. + If *ensure_ascii* is ``True`` (the default), all non-ASCII characters in the + output are escaped with ``\uXXXX`` sequences, and the results are + :class:`str` instances consisting of ASCII characters only. If + *ensure_ascii* is ``False``, a result may be a :class:`unicode` + instance. This usually happens if the input contains unicode strings or the + *encoding* parameter is used. If *check_circular* is ``True`` (the default), then lists, dicts, and custom encoded objects will be checked for circular references during encoding to diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index d3b8b0b..4f3f6c5 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -129,11 +129,14 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the some chunks written to ``fp`` - may be ``unicode`` instances, subject to normal Python ``str`` to - ``unicode`` coercion rules. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter()``) this is likely - to cause an error. + If ``ensure_ascii`` is true (the default), all non-ASCII characters in the + output are escaped with ``\uXXXX`` sequences, and the result is a ``str`` + instance consisting of ASCII characters only. If ``ensure_ascii`` is + ``False``, some chunks written to ``fp`` may be ``unicode`` instances. + This usually happens because the input contains unicode strings or the + ``encoding`` parameter is used. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter``) this is likely to + cause an error. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -191,9 +194,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the return value will be a - ``unicode`` instance subject to normal Python ``str`` to ``unicode`` - coercion rules instead of being escaped to an ASCII ``str``. + If ``ensure_ascii`` is false, all non-ASCII characters are not escaped, and + the return value may be a ``unicode`` instance. See ``dump`` for details. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index b0d745b..169450d 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -107,9 +107,12 @@ class JSONEncoder(object): encoding of keys that are not str, int, long, float or None. If skipkeys is True, such items are simply skipped. - If ensure_ascii is true, the output is guaranteed to be str - objects with all incoming unicode characters escaped. If - ensure_ascii is false, the output will be unicode object. + If *ensure_ascii* is true (the default), all non-ASCII + characters in the output are escaped with \uXXXX sequences, + and the results are str instances consisting of ASCII + characters only. If ensure_ascii is False, a result may be a + unicode instance. This usually happens if the input contains + unicode strings or the *encoding* parameter is used. If check_circular is true, then lists, dicts, and custom encoded objects will be checked for circular references during encoding to diff --git a/Misc/NEWS b/Misc/NEWS index bdfd0b7..13e8b00 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -393,6 +393,9 @@ Build Documentation ------------- +- Issue #13769: Document the effect of ensure_ascii to the return type + of JSON decoding functions. + - Issue #14880: Fix kwargs notation in csv.reader, .writer & .register_dialect. Patch by Chris Rebert. -- cgit v0.12