summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/io.rst11
-rw-r--r--Include/internal/pycore_global_strings.h1
-rw-r--r--Include/internal/pycore_runtime_init.h1
-rw-r--r--Lib/_pyio.py10
-rw-r--r--Lib/test/test_io.py11
-rw-r--r--Lib/test/test_utf8_mode.py6
-rw-r--r--Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst1
-rw-r--r--Modules/_io/_iomodule.c16
-rw-r--r--Modules/_io/clinic/_iomodule.c.h7
-rw-r--r--Python/sysmodule.c5
10 files changed, 52 insertions, 17 deletions
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index d512334..80107d5 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -198,12 +198,13 @@ High-level Module Interface
This is a helper function for callables that use :func:`open` or
:class:`TextIOWrapper` and have an ``encoding=None`` parameter.
- This function returns *encoding* if it is not ``None`` and ``"locale"`` if
- *encoding* is ``None``.
+ This function returns *encoding* if it is not ``None``.
+ Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on
+ :ref:`UTF-8 Mode <utf8-mode>`.
This function emits an :class:`EncodingWarning` if
:data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
- is None. *stacklevel* specifies where the warning is emitted.
+ is ``None``. *stacklevel* specifies where the warning is emitted.
For example::
def read_text(path, encoding=None):
@@ -218,6 +219,10 @@ High-level Module Interface
.. versionadded:: 3.10
+ .. versionchanged:: 3.11
+ :func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and
+ *encoding* is ``None``.
+
.. exception:: BlockingIOError
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 3e533fd..833ff27 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -48,6 +48,7 @@ struct _Py_global_strings {
STRUCT_FOR_STR(newline, "\n")
STRUCT_FOR_STR(open_br, "{")
STRUCT_FOR_STR(percent, "%")
+ STRUCT_FOR_STR(utf_8, "utf-8")
} literals;
struct {
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index d5690d8..fd925b3 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -672,6 +672,7 @@ extern "C" {
INIT_STR(newline, "\n"), \
INIT_STR(open_br, "{"), \
INIT_STR(percent, "%"), \
+ INIT_STR(utf_8, "utf-8"), \
}, \
.identifiers = { \
INIT_ID(False), \
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index fd00d65..e3ff59e 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
"""
A helper function to choose the text encoding.
- When encoding is not None, just return it.
- Otherwise, return the default text encoding (i.e. "locale").
+ When encoding is not None, this function returns it.
+ Otherwise, this function returns the default text encoding
+ (i.e. "locale" or "utf-8" depends on UTF-8 mode).
This function emits an EncodingWarning if *encoding* is None and
sys.flags.warn_default_encoding is true.
@@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
However, please consider using encoding="utf-8" for new APIs.
"""
if encoding is None:
- encoding = "locale"
+ if sys.flags.utf8_mode:
+ encoding = "utf-8"
+ else:
+ encoding = "locale"
if sys.flags.warn_default_encoding:
import warnings
warnings.warn("'encoding' argument not specified.",
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 2d0ca87..67be108 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -4289,6 +4289,17 @@ class MiscIOTest(unittest.TestCase):
self.assertTrue(
warnings[1].startswith(b"<string>:8: EncodingWarning: "))
+ def test_text_encoding(self):
+ # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8"
+ # based on sys.flags.utf8_mode
+ code = "import io; print(io.text_encoding(None))"
+
+ proc = assert_python_ok('-X', 'utf8=0', '-c', code)
+ self.assertEqual(b"locale", proc.out.strip())
+
+ proc = assert_python_ok('-X', 'utf8=1', '-c', code)
+ self.assertEqual(b"utf-8", proc.out.strip())
+
@support.cpython_only
# Depending if OpenWrapper was already created or not, the warning is
# emitted or not. For example, the attribute is already created when this
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 2b96f76..308e8e8 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -161,7 +161,7 @@ class UTF8ModeTests(unittest.TestCase):
filename = __file__
out = self.get_output('-c', code, filename, PYTHONUTF8='1')
- self.assertEqual(out, 'UTF-8/strict')
+ self.assertEqual(out.lower(), 'utf-8/strict')
def _check_io_encoding(self, module, encoding=None, errors=None):
filename = __file__
@@ -183,10 +183,10 @@ class UTF8ModeTests(unittest.TestCase):
PYTHONUTF8='1')
if not encoding:
- encoding = 'UTF-8'
+ encoding = 'utf-8'
if not errors:
errors = 'strict'
- self.assertEqual(out, f'{encoding}/{errors}')
+ self.assertEqual(out.lower(), f'{encoding}/{errors}')
def check_io_encoding(self, module):
self._check_io_encoding(module, encoding="latin1")
diff --git a/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst
new file mode 100644
index 0000000..f96b6e6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst
@@ -0,0 +1 @@
+Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index 7f029f2..065f5e2 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -457,8 +457,9 @@ _io.text_encoding
A helper function to choose the text encoding.
-When encoding is not None, just return it.
-Otherwise, return the default text encoding (i.e. "locale").
+When encoding is not None, this function returns it.
+Otherwise, this function returns the default text encoding
+(i.e. "locale" or "utf-8" depends on UTF-8 mode).
This function emits an EncodingWarning if encoding is None and
sys.flags.warn_default_encoding is true.
@@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs.
static PyObject *
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
-/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
+/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/
{
if (encoding == NULL || encoding == Py_None) {
PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
return NULL;
}
}
- return &_Py_ID(locale);
+ const PyPreConfig *preconfig = &_PyRuntime.preconfig;
+ if (preconfig->utf8_mode) {
+ _Py_DECLARE_STR(utf_8, "utf-8");
+ encoding = &_Py_STR(utf_8);
+ }
+ else {
+ encoding = &_Py_ID(locale);
+ }
}
Py_INCREF(encoding);
return encoding;
diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h
index d5fb176..e4a6b8c 100644
--- a/Modules/_io/clinic/_iomodule.c.h
+++ b/Modules/_io/clinic/_iomodule.c.h
@@ -273,8 +273,9 @@ PyDoc_STRVAR(_io_text_encoding__doc__,
"\n"
"A helper function to choose the text encoding.\n"
"\n"
-"When encoding is not None, just return it.\n"
-"Otherwise, return the default text encoding (i.e. \"locale\").\n"
+"When encoding is not None, this function returns it.\n"
+"Otherwise, this function returns the default text encoding\n"
+"(i.e. \"locale\" or \"utf-8\" depends on UTF-8 mode).\n"
"\n"
"This function emits an EncodingWarning if encoding is None and\n"
"sys.flags.warn_default_encoding is true.\n"
@@ -354,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
exit:
return return_value;
}
-/*[clinic end generated code: output=6ea315343f6a94ba input=a9049054013a1b77]*/
+/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 5765e9e..de4e10a 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -841,7 +841,10 @@ static PyObject *
sys_getdefaultencoding_impl(PyObject *module)
/*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
{
- return PyUnicode_FromString(PyUnicode_GetDefaultEncoding());
+ _Py_DECLARE_STR(utf_8, "utf-8");
+ PyObject *ret = &_Py_STR(utf_8);
+ Py_INCREF(ret);
+ return ret;
}
/*[clinic input]