diff options
author | Inada Naoki <songofacandy@gmail.com> | 2021-03-29 03:28:14 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-29 03:28:14 (GMT) |
commit | 4827483f47906fecee6b5d9097df2a69a293a85c (patch) | |
tree | c4d7e34163e9583c06003d5335d020ce27aa4559 /Modules | |
parent | 261a452a1300eeeae1428ffd6e6623329c085e2c (diff) | |
download | cpython-4827483f47906fecee6b5d9097df2a69a293a85c.zip cpython-4827483f47906fecee6b5d9097df2a69a293a85c.tar.gz cpython-4827483f47906fecee6b5d9097df2a69a293a85c.tar.bz2 |
bpo-43510: Implement PEP 597 opt-in EncodingWarning. (GH-19481)
See [PEP 597](https://www.python.org/dev/peps/pep-0597/).
* Add `-X warn_default_encoding` and `PYTHONWARNDEFAULTENCODING`.
* Add EncodingWarning
* Add io.text_encoding()
* open(), TextIOWrapper() emits EncodingWarning when encoding is omitted and warn_default_encoding is enabled.
* _pyio.TextIOWrapper() uses UTF-8 as fallback default encoding used when failed to import locale module. (used during building Python)
* bz2, configparser, gzip, lzma, pathlib, tempfile modules use io.text_encoding().
* What's new entry
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_io/_iomodule.c | 41 | ||||
-rw-r--r-- | Modules/_io/clinic/_iomodule.c.h | 48 | ||||
-rw-r--r-- | Modules/_io/textio.c | 11 |
3 files changed, 99 insertions, 1 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 9147648..652c2ce 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -10,6 +10,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "_iomodule.h" +#include "pycore_pystate.h" // _PyInterpreterState_GET() #ifdef HAVE_SYS_TYPES_H #include <sys/types.h> @@ -33,6 +34,7 @@ PyObject *_PyIO_str_fileno = NULL; PyObject *_PyIO_str_flush = NULL; PyObject *_PyIO_str_getstate = NULL; PyObject *_PyIO_str_isatty = NULL; +PyObject *_PyIO_str_locale = NULL; PyObject *_PyIO_str_newlines = NULL; PyObject *_PyIO_str_nl = NULL; PyObject *_PyIO_str_peek = NULL; @@ -504,6 +506,43 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, return NULL; } + +/*[clinic input] +_io.text_encoding + encoding: object + stacklevel: int = 2 + / + +A helper function to choose the text encoding. + +When encoding is not None, just return it. +Otherwise, return the default text encoding (i.e. "locale"). + +This function emits an EncodingWarning if encoding is None and +sys.flags.warn_default_encoding is true. + +This can be used in APIs with an encoding=None parameter. +However, please consider using encoding="utf-8" for new APIs. +[clinic start generated code]*/ + +static PyObject * +_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) +/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/ +{ + if (encoding == NULL || encoding == Py_None) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { + PyErr_WarnEx(PyExc_EncodingWarning, + "'encoding' argument not specified", stacklevel); + } + Py_INCREF(_PyIO_str_locale); + return _PyIO_str_locale; + } + Py_INCREF(encoding); + return encoding; +} + + /*[clinic input] _io.open_code @@ -629,6 +668,7 @@ iomodule_free(PyObject *mod) { static PyMethodDef module_methods[] = { _IO_OPEN_METHODDEF + _IO_TEXT_ENCODING_METHODDEF _IO_OPEN_CODE_METHODDEF {NULL, NULL} }; @@ -747,6 +787,7 @@ PyInit__io(void) ADD_INTERNED(flush) ADD_INTERNED(getstate) ADD_INTERNED(isatty) + ADD_INTERNED(locale) ADD_INTERNED(newlines) ADD_INTERNED(peek) ADD_INTERNED(read) diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index dc7b5ff..91c55b1 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -272,6 +272,52 @@ exit: return return_value; } +PyDoc_STRVAR(_io_text_encoding__doc__, +"text_encoding($module, encoding, stacklevel=2, /)\n" +"--\n" +"\n" +"A helper function to choose the text encoding.\n" +"\n" +"When encoding is not None, just return it.\n" +"Otherwise, return the default text encoding (i.e. \"locale\").\n" +"\n" +"This function emits an EncodingWarning if encoding is None and\n" +"sys.flags.warn_default_encoding is true.\n" +"\n" +"This can be used in APIs with an encoding=None parameter.\n" +"However, please consider using encoding=\"utf-8\" for new APIs."); + +#define _IO_TEXT_ENCODING_METHODDEF \ + {"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__}, + +static PyObject * +_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel); + +static PyObject * +_io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *encoding; + int stacklevel = 2; + + if (!_PyArg_CheckPositional("text_encoding", nargs, 1, 2)) { + goto exit; + } + encoding = args[0]; + if (nargs < 2) { + goto skip_optional; + } + stacklevel = _PyLong_AsInt(args[1]); + if (stacklevel == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + return_value = _io_text_encoding_impl(module, encoding, stacklevel); + +exit: + return return_value; +} + PyDoc_STRVAR(_io_open_code__doc__, "open_code($module, /, path)\n" "--\n" @@ -313,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=5c0dd7a262c30ebc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=06e055d1d80b835d input=a9049054013a1b77]*/ diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 03001ec..6f89a87 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1124,6 +1124,17 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, self->b2cratio = 0.0; if (encoding == NULL) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { + PyErr_WarnEx(PyExc_EncodingWarning, + "'encoding' argument not specified", 1); + } + } + else if (strcmp(encoding, "locale") == 0) { + encoding = NULL; + } + + if (encoding == NULL) { /* Try os.device_encoding(fileno) */ PyObject *fileno; state = IO_STATE(); |