summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2021-03-29 03:28:14 (GMT)
committerGitHub <noreply@github.com>2021-03-29 03:28:14 (GMT)
commit4827483f47906fecee6b5d9097df2a69a293a85c (patch)
treec4d7e34163e9583c06003d5335d020ce27aa4559 /Modules
parent261a452a1300eeeae1428ffd6e6623329c085e2c (diff)
downloadcpython-4827483f47906fecee6b5d9097df2a69a293a85c.zip
cpython-4827483f47906fecee6b5d9097df2a69a293a85c.tar.gz
cpython-4827483f47906fecee6b5d9097df2a69a293a85c.tar.bz2
bpo-43510: Implement PEP 597 opt-in EncodingWarning. (GH-19481)
See [PEP 597](https://www.python.org/dev/peps/pep-0597/). * Add `-X warn_default_encoding` and `PYTHONWARNDEFAULTENCODING`. * Add EncodingWarning * Add io.text_encoding() * open(), TextIOWrapper() emits EncodingWarning when encoding is omitted and warn_default_encoding is enabled. * _pyio.TextIOWrapper() uses UTF-8 as fallback default encoding used when failed to import locale module. (used during building Python) * bz2, configparser, gzip, lzma, pathlib, tempfile modules use io.text_encoding(). * What's new entry
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_io/_iomodule.c41
-rw-r--r--Modules/_io/clinic/_iomodule.c.h48
-rw-r--r--Modules/_io/textio.c11
3 files changed, 99 insertions, 1 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index 9147648..652c2ce 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -10,6 +10,7 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "_iomodule.h"
+#include "pycore_pystate.h" // _PyInterpreterState_GET()
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
@@ -33,6 +34,7 @@ PyObject *_PyIO_str_fileno = NULL;
PyObject *_PyIO_str_flush = NULL;
PyObject *_PyIO_str_getstate = NULL;
PyObject *_PyIO_str_isatty = NULL;
+PyObject *_PyIO_str_locale = NULL;
PyObject *_PyIO_str_newlines = NULL;
PyObject *_PyIO_str_nl = NULL;
PyObject *_PyIO_str_peek = NULL;
@@ -504,6 +506,43 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
return NULL;
}
+
+/*[clinic input]
+_io.text_encoding
+ encoding: object
+ stacklevel: int = 2
+ /
+
+A helper function to choose the text encoding.
+
+When encoding is not None, just return it.
+Otherwise, return the default text encoding (i.e. "locale").
+
+This function emits an EncodingWarning if encoding is None and
+sys.flags.warn_default_encoding is true.
+
+This can be used in APIs with an encoding=None parameter.
+However, please consider using encoding="utf-8" for new APIs.
+[clinic start generated code]*/
+
+static PyObject *
+_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
+/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
+{
+ if (encoding == NULL || encoding == Py_None) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
+ PyErr_WarnEx(PyExc_EncodingWarning,
+ "'encoding' argument not specified", stacklevel);
+ }
+ Py_INCREF(_PyIO_str_locale);
+ return _PyIO_str_locale;
+ }
+ Py_INCREF(encoding);
+ return encoding;
+}
+
+
/*[clinic input]
_io.open_code
@@ -629,6 +668,7 @@ iomodule_free(PyObject *mod) {
static PyMethodDef module_methods[] = {
_IO_OPEN_METHODDEF
+ _IO_TEXT_ENCODING_METHODDEF
_IO_OPEN_CODE_METHODDEF
{NULL, NULL}
};
@@ -747,6 +787,7 @@ PyInit__io(void)
ADD_INTERNED(flush)
ADD_INTERNED(getstate)
ADD_INTERNED(isatty)
+ ADD_INTERNED(locale)
ADD_INTERNED(newlines)
ADD_INTERNED(peek)
ADD_INTERNED(read)
diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h
index dc7b5ff..91c55b1 100644
--- a/Modules/_io/clinic/_iomodule.c.h
+++ b/Modules/_io/clinic/_iomodule.c.h
@@ -272,6 +272,52 @@ exit:
return return_value;
}
+PyDoc_STRVAR(_io_text_encoding__doc__,
+"text_encoding($module, encoding, stacklevel=2, /)\n"
+"--\n"
+"\n"
+"A helper function to choose the text encoding.\n"
+"\n"
+"When encoding is not None, just return it.\n"
+"Otherwise, return the default text encoding (i.e. \"locale\").\n"
+"\n"
+"This function emits an EncodingWarning if encoding is None and\n"
+"sys.flags.warn_default_encoding is true.\n"
+"\n"
+"This can be used in APIs with an encoding=None parameter.\n"
+"However, please consider using encoding=\"utf-8\" for new APIs.");
+
+#define _IO_TEXT_ENCODING_METHODDEF \
+ {"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__},
+
+static PyObject *
+_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel);
+
+static PyObject *
+_io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyObject *encoding;
+ int stacklevel = 2;
+
+ if (!_PyArg_CheckPositional("text_encoding", nargs, 1, 2)) {
+ goto exit;
+ }
+ encoding = args[0];
+ if (nargs < 2) {
+ goto skip_optional;
+ }
+ stacklevel = _PyLong_AsInt(args[1]);
+ if (stacklevel == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+skip_optional:
+ return_value = _io_text_encoding_impl(module, encoding, stacklevel);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(_io_open_code__doc__,
"open_code($module, /, path)\n"
"--\n"
@@ -313,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
exit:
return return_value;
}
-/*[clinic end generated code: output=5c0dd7a262c30ebc input=a9049054013a1b77]*/
+/*[clinic end generated code: output=06e055d1d80b835d input=a9049054013a1b77]*/
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index 03001ec..6f89a87 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1124,6 +1124,17 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
self->b2cratio = 0.0;
if (encoding == NULL) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
+ PyErr_WarnEx(PyExc_EncodingWarning,
+ "'encoding' argument not specified", 1);
+ }
+ }
+ else if (strcmp(encoding, "locale") == 0) {
+ encoding = NULL;
+ }
+
+ if (encoding == NULL) {
/* Try os.device_encoding(fileno) */
PyObject *fileno;
state = IO_STATE();