From 13d49ee7d6a44af656fd77713342e419ec57e4a5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 4 Dec 2010 17:24:33 +0000 Subject: Issue #10601: sys.displayhook uses 'backslashreplace' error handler on UnicodeEncodeError. --- Doc/library/sys.rst | 30 ++++++++++++++++-- Lib/test/test_cmd_line.py | 18 +++++++++++ Misc/NEWS | 3 ++ Python/sysmodule.c | 78 +++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 125 insertions(+), 4 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index c7aa214..95d1cf9 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -99,13 +99,39 @@ always available. .. function:: displayhook(value) - If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves - it in ``builtins._``. + If *value* is not ``None``, this function prints ``repr(value)`` to + ``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is + not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error + handler (which is probably ``'strict'``), encode it to + ``sys.stdout.encoding`` with ``'backslashreplace'`` error handler. ``sys.displayhook`` is called on the result of evaluating an :term:`expression` entered in an interactive Python session. The display of these values can be customized by assigning another one-argument function to ``sys.displayhook``. + Pseudo-code:: + + def displayhook(value): + if value is None: + return + # Set '_' to None to avoid recursion + builtins._ = None + text = repr(value) + try: + sys.stdout.write(text) + except UnicodeEncodeError: + bytes = text.encode(sys.stdout.encoding, 'backslashreplace') + if hasattr(sys.stdout, 'buffer'): + sys.stdout.buffer.write(bytes) + else: + text = bytes.decode(sys.stdout.encoding, 'strict') + sys.stdout.write(text) + sys.stdout.write("\n") + builtins._ = value + + .. versionchanged:: 3.2 + Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`. + .. function:: excepthook(type, value, traceback) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index e1fe7f5..b21b61e 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -221,6 +221,24 @@ class CmdLineTest(unittest.TestCase): self.assertIn(path1.encode('ascii'), out) self.assertIn(path2.encode('ascii'), out) + def test_displayhook_unencodable(self): + for encoding in ('ascii', 'latin1', 'utf8'): + env = os.environ.copy() + env['PYTHONIOENCODING'] = encoding + p = subprocess.Popen( + [sys.executable, '-i'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env) + # non-ascii, surrogate, non-BMP printable, non-BMP unprintable + text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF" + p.stdin.write(ascii(text).encode('ascii') + b"\n") + p.stdin.write(b'exit()\n') + data = kill_python(p) + escaped = repr(text).encode(encoding, 'backslashreplace') + self.assertIn(escaped, data) + def test_main(): test.support.run_unittest(CmdLineTest) diff --git a/Misc/NEWS b/Misc/NEWS index 0d1c1d5..124fc1c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -49,6 +49,9 @@ Core and Builtins Library ------- +- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on + UnicodeEncodeError. + - Add the "display" and "undisplay" pdb commands. - Issue #7245: Add a SIGINT handler in pdb that allows to break a program diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 204c8c8..0a14f0e 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -65,6 +65,68 @@ PySys_SetObject(const char *name, PyObject *v) return PyDict_SetItemString(sd, name, v); } +/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace' + error handler. If sys.stdout has a buffer attribute, use + sys.stdout.buffer.write(encoded), otherwise redecode the string and use + sys.stdout.write(redecoded). + + Helper function for sys_displayhook(). */ +static int +sys_displayhook_unencodable(PyObject *outf, PyObject *o) +{ + PyObject *stdout_encoding = NULL; + PyObject *encoded, *escaped_str, *repr_str, *buffer, *result; + char *stdout_encoding_str; + int ret; + + stdout_encoding = PyObject_GetAttrString(outf, "encoding"); + if (stdout_encoding == NULL) + goto error; + stdout_encoding_str = _PyUnicode_AsString(stdout_encoding); + if (stdout_encoding_str == NULL) + goto error; + + repr_str = PyObject_Repr(o); + if (repr_str == NULL) + goto error; + encoded = PyUnicode_AsEncodedString(repr_str, + stdout_encoding_str, + "backslashreplace"); + Py_DECREF(repr_str); + if (encoded == NULL) + goto error; + + buffer = PyObject_GetAttrString(outf, "buffer"); + if (buffer) { + result = PyObject_CallMethod(buffer, "write", "(O)", encoded); + Py_DECREF(buffer); + Py_DECREF(encoded); + if (result == NULL) + goto error; + Py_DECREF(result); + } + else { + PyErr_Clear(); + escaped_str = PyUnicode_FromEncodedObject(encoded, + stdout_encoding_str, + "strict"); + Py_DECREF(encoded); + if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) { + Py_DECREF(escaped_str); + goto error; + } + Py_DECREF(escaped_str); + } + ret = 0; + goto finally; + +error: + ret = -1; +finally: + Py_XDECREF(stdout_encoding); + return ret; +} + static PyObject * sys_displayhook(PyObject *self, PyObject *o) { @@ -72,6 +134,7 @@ sys_displayhook(PyObject *self, PyObject *o) PyInterpreterState *interp = PyThreadState_GET()->interp; PyObject *modules = interp->modules; PyObject *builtins = PyDict_GetItemString(modules, "builtins"); + int err; if (builtins == NULL) { PyErr_SetString(PyExc_RuntimeError, "lost builtins module"); @@ -92,8 +155,19 @@ sys_displayhook(PyObject *self, PyObject *o) PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); return NULL; } - if (PyFile_WriteObject(o, outf, 0) != 0) - return NULL; + if (PyFile_WriteObject(o, outf, 0) != 0) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { + /* repr(o) is not encodable to sys.stdout.encoding with + * sys.stdout.errors error handler (which is probably 'strict') */ + PyErr_Clear(); + err = sys_displayhook_unencodable(outf, o); + if (err) + return NULL; + } + else { + return NULL; + } + } if (PyFile_WriteString("\n", outf) != 0) return NULL; if (PyObject_SetAttrString(builtins, "_", o) != 0) -- cgit v0.12