summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2019-06-25 22:51:05 (GMT)
committerGitHub <noreply@github.com>2019-06-25 22:51:05 (GMT)
commit22eb689cf3de7972a2789db3ad01a86949508ab7 (patch)
treea1d63fa4cf235008e73f92a18ebef57be54ce4a5 /Modules
parente1a63c4f21011a3ae77dff624196561070c83446 (diff)
downloadcpython-22eb689cf3de7972a2789db3ad01a86949508ab7.zip
cpython-22eb689cf3de7972a2789db3ad01a86949508ab7.tar.gz
cpython-22eb689cf3de7972a2789db3ad01a86949508ab7.tar.bz2
bpo-37388: Development mode check encoding and errors (GH-14341)
In development mode and in debug build, encoding and errors arguments are now checked on string encoding and decoding operations. Examples: open(), str.encode() and bytes.decode(). By default, for best performances, the errors argument is only checked at the first encoding/decoding error, and the encoding argument is sometimes ignored for empty strings.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_io/textio.c43
1 files changed, 43 insertions, 0 deletions
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 73b2756..021231e 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -988,6 +988,46 @@ _textiowrapper_fix_encoder_state(textio *self)
return 0;
}
+static int
+io_check_errors(PyObject *errors)
+{
+ assert(errors != NULL && errors != Py_None);
+
+ PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
+#ifndef Py_DEBUG
+ /* In release mode, only check in development mode (-X dev) */
+ if (!interp->config.dev_mode) {
+ return 0;
+ }
+#else
+ /* Always check in debug mode */
+#endif
+
+ /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
+ before_PyUnicode_InitEncodings() is called. */
+ if (!interp->fs_codec.encoding) {
+ return 0;
+ }
+
+ Py_ssize_t name_length;
+ const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
+ if (name == NULL) {
+ return -1;
+ }
+ if (strlen(name) != (size_t)name_length) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
+ return -1;
+ }
+ PyObject *handler = PyCodec_LookupError(name);
+ if (handler != NULL) {
+ Py_DECREF(handler);
+ return 0;
+ }
+ return -1;
+}
+
+
+
/*[clinic input]
_io.TextIOWrapper.__init__
buffer: object
@@ -1057,6 +1097,9 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
errors->ob_type->tp_name);
return -1;
}
+ else if (io_check_errors(errors)) {
+ return -1;
+ }
if (validate_newline(newline) < 0) {
return -1;