summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-12-04 17:24:33 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-12-04 17:24:33 (GMT)
commit13d49ee7d6a44af656fd77713342e419ec57e4a5 (patch)
tree30250bac5be4f5e904d62f3628424e0ef6e22c2c
parent44588b45d2b9cedb9fd91f82c1b00cd781a56c94 (diff)
downloadcpython-13d49ee7d6a44af656fd77713342e419ec57e4a5.zip
cpython-13d49ee7d6a44af656fd77713342e419ec57e4a5.tar.gz
cpython-13d49ee7d6a44af656fd77713342e419ec57e4a5.tar.bz2
Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
UnicodeEncodeError.
-rw-r--r--Doc/library/sys.rst30
-rw-r--r--Lib/test/test_cmd_line.py18
-rw-r--r--Misc/NEWS3
-rw-r--r--Python/sysmodule.c78
4 files changed, 125 insertions, 4 deletions
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index c7aa214..95d1cf9 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -99,13 +99,39 @@ always available.
.. function:: displayhook(value)
- If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves
- it in ``builtins._``.
+ If *value* is not ``None``, this function prints ``repr(value)`` to
+ ``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is
+ not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error
+ handler (which is probably ``'strict'``), encode it to
+ ``sys.stdout.encoding`` with ``'backslashreplace'`` error handler.
``sys.displayhook`` is called on the result of evaluating an :term:`expression`
entered in an interactive Python session. The display of these values can be
customized by assigning another one-argument function to ``sys.displayhook``.
+ Pseudo-code::
+
+ def displayhook(value):
+ if value is None:
+ return
+ # Set '_' to None to avoid recursion
+ builtins._ = None
+ text = repr(value)
+ try:
+ sys.stdout.write(text)
+ except UnicodeEncodeError:
+ bytes = text.encode(sys.stdout.encoding, 'backslashreplace')
+ if hasattr(sys.stdout, 'buffer'):
+ sys.stdout.buffer.write(bytes)
+ else:
+ text = bytes.decode(sys.stdout.encoding, 'strict')
+ sys.stdout.write(text)
+ sys.stdout.write("\n")
+ builtins._ = value
+
+ .. versionchanged:: 3.2
+ Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`.
+
.. function:: excepthook(type, value, traceback)
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index e1fe7f5..b21b61e 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -221,6 +221,24 @@ class CmdLineTest(unittest.TestCase):
self.assertIn(path1.encode('ascii'), out)
self.assertIn(path2.encode('ascii'), out)
+ def test_displayhook_unencodable(self):
+ for encoding in ('ascii', 'latin1', 'utf8'):
+ env = os.environ.copy()
+ env['PYTHONIOENCODING'] = encoding
+ p = subprocess.Popen(
+ [sys.executable, '-i'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ env=env)
+ # non-ascii, surrogate, non-BMP printable, non-BMP unprintable
+ text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF"
+ p.stdin.write(ascii(text).encode('ascii') + b"\n")
+ p.stdin.write(b'exit()\n')
+ data = kill_python(p)
+ escaped = repr(text).encode(encoding, 'backslashreplace')
+ self.assertIn(escaped, data)
+
def test_main():
test.support.run_unittest(CmdLineTest)
diff --git a/Misc/NEWS b/Misc/NEWS
index 0d1c1d5..124fc1c 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,9 @@ Core and Builtins
Library
-------
+- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
+ UnicodeEncodeError.
+
- Add the "display" and "undisplay" pdb commands.
- Issue #7245: Add a SIGINT handler in pdb that allows to break a program
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 204c8c8..0a14f0e 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -65,6 +65,68 @@ PySys_SetObject(const char *name, PyObject *v)
return PyDict_SetItemString(sd, name, v);
}
+/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace'
+ error handler. If sys.stdout has a buffer attribute, use
+ sys.stdout.buffer.write(encoded), otherwise redecode the string and use
+ sys.stdout.write(redecoded).
+
+ Helper function for sys_displayhook(). */
+static int
+sys_displayhook_unencodable(PyObject *outf, PyObject *o)
+{
+ PyObject *stdout_encoding = NULL;
+ PyObject *encoded, *escaped_str, *repr_str, *buffer, *result;
+ char *stdout_encoding_str;
+ int ret;
+
+ stdout_encoding = PyObject_GetAttrString(outf, "encoding");
+ if (stdout_encoding == NULL)
+ goto error;
+ stdout_encoding_str = _PyUnicode_AsString(stdout_encoding);
+ if (stdout_encoding_str == NULL)
+ goto error;
+
+ repr_str = PyObject_Repr(o);
+ if (repr_str == NULL)
+ goto error;
+ encoded = PyUnicode_AsEncodedString(repr_str,
+ stdout_encoding_str,
+ "backslashreplace");
+ Py_DECREF(repr_str);
+ if (encoded == NULL)
+ goto error;
+
+ buffer = PyObject_GetAttrString(outf, "buffer");
+ if (buffer) {
+ result = PyObject_CallMethod(buffer, "write", "(O)", encoded);
+ Py_DECREF(buffer);
+ Py_DECREF(encoded);
+ if (result == NULL)
+ goto error;
+ Py_DECREF(result);
+ }
+ else {
+ PyErr_Clear();
+ escaped_str = PyUnicode_FromEncodedObject(encoded,
+ stdout_encoding_str,
+ "strict");
+ Py_DECREF(encoded);
+ if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) {
+ Py_DECREF(escaped_str);
+ goto error;
+ }
+ Py_DECREF(escaped_str);
+ }
+ ret = 0;
+ goto finally;
+
+error:
+ ret = -1;
+finally:
+ Py_XDECREF(stdout_encoding);
+ return ret;
+}
+
static PyObject *
sys_displayhook(PyObject *self, PyObject *o)
{
@@ -72,6 +134,7 @@ sys_displayhook(PyObject *self, PyObject *o)
PyInterpreterState *interp = PyThreadState_GET()->interp;
PyObject *modules = interp->modules;
PyObject *builtins = PyDict_GetItemString(modules, "builtins");
+ int err;
if (builtins == NULL) {
PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
@@ -92,8 +155,19 @@ sys_displayhook(PyObject *self, PyObject *o)
PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
return NULL;
}
- if (PyFile_WriteObject(o, outf, 0) != 0)
- return NULL;
+ if (PyFile_WriteObject(o, outf, 0) != 0) {
+ if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
+ /* repr(o) is not encodable to sys.stdout.encoding with
+ * sys.stdout.errors error handler (which is probably 'strict') */
+ PyErr_Clear();
+ err = sys_displayhook_unencodable(outf, o);
+ if (err)
+ return NULL;
+ }
+ else {
+ return NULL;
+ }
+ }
if (PyFile_WriteString("\n", outf) != 0)
return NULL;
if (PyObject_SetAttrString(builtins, "_", o) != 0)