summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/_bootlocale.py6
-rw-r--r--Lib/encodings/__init__.py5
-rw-r--r--Lib/locale.py6
-rw-r--r--Lib/subprocess.py2
-rw-r--r--Lib/test/test_builtin.py1
-rw-r--r--Lib/test/test_c_locale_coercion.py2
-rw-r--r--Lib/test/test_codecs.py10
-rw-r--r--Lib/test/test_io.py2
-rw-r--r--Lib/test/test_sys.py8
-rw-r--r--Lib/test/test_utf8_mode.py206
10 files changed, 233 insertions, 15 deletions
diff --git a/Lib/_bootlocale.py b/Lib/_bootlocale.py
index 0c61b0d..3273a3b 100644
--- a/Lib/_bootlocale.py
+++ b/Lib/_bootlocale.py
@@ -9,6 +9,8 @@ import _locale
if sys.platform.startswith("win"):
def getpreferredencoding(do_setlocale=True):
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
return _locale._getdefaultlocale()[1]
else:
try:
@@ -21,6 +23,8 @@ else:
return 'UTF-8'
else:
def getpreferredencoding(do_setlocale=True):
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
# This path for legacy systems needs the more complex
# getdefaultlocale() function, import the full locale module.
import locale
@@ -28,6 +32,8 @@ else:
else:
def getpreferredencoding(do_setlocale=True):
assert not do_setlocale
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
result = _locale.nl_langinfo(_locale.CODESET)
if not result and sys.platform == 'darwin':
# nl_langinfo can return an empty string
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index aa2fb7c..025b7a8 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -158,8 +158,9 @@ codecs.register(search_function)
if sys.platform == 'win32':
def _alias_mbcs(encoding):
try:
- import _bootlocale
- if encoding == _bootlocale.getpreferredencoding(False):
+ import _winapi
+ ansi_code_page = "cp%s" % _winapi.GetACP()
+ if encoding == ansi_code_page:
import encodings.mbcs
return encodings.mbcs.getregentry()
except ImportError:
diff --git a/Lib/locale.py b/Lib/locale.py
index f1d157d..18079e7 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -617,6 +617,8 @@ if sys.platform.startswith("win"):
# On Win32, this will return the ANSI code page
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using."""
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
import _bootlocale
return _bootlocale.getpreferredencoding(False)
else:
@@ -634,6 +636,8 @@ else:
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
by looking at environment variables."""
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
res = getdefaultlocale()[1]
if res is None:
# LANG not set, default conservatively to ASCII
@@ -643,6 +647,8 @@ else:
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
according to the system configuration."""
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
import _bootlocale
if do_setlocale:
oldloc = setlocale(LC_CTYPE)
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
index 301433c..65b4086 100644
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -280,7 +280,7 @@ def _args_from_interpreter_flags():
if dev_mode:
args.extend(('-X', 'dev'))
for opt in ('faulthandler', 'tracemalloc', 'importtime',
- 'showalloccount', 'showrefcount'):
+ 'showalloccount', 'showrefcount', 'utf8'):
if opt in xoptions:
value = xoptions[opt]
if value is True:
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 0a61c05..9329318 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -1022,6 +1022,7 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(ValueError, open, 'a\x00b')
self.assertRaises(ValueError, open, b'a\x00b')
+ @unittest.skipIf(sys.flags.utf8_mode, "utf-8 mode is enabled")
def test_open_default_encoding(self):
old_environ = dict(os.environ)
try:
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
index 2a22739..c0845d7 100644
--- a/Lib/test/test_c_locale_coercion.py
+++ b/Lib/test/test_c_locale_coercion.py
@@ -130,7 +130,7 @@ class EncodingDetails(_EncodingDetails):
that.
"""
result, py_cmd = run_python_until_end(
- "-c", cls.CHILD_PROCESS_SCRIPT,
+ "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
__isolated=True,
**env_vars
)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index eb21a39..a59a5e2 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -5,6 +5,7 @@ import locale
import sys
import unittest
import encodings
+from unittest import mock
from test import support
@@ -3180,16 +3181,9 @@ class CodePageTest(unittest.TestCase):
def test_mbcs_alias(self):
# Check that looking up our 'default' codepage will return
# mbcs when we don't have a more specific one available
- import _bootlocale
- def _get_fake_codepage(*a):
- return 'cp123'
- old_getpreferredencoding = _bootlocale.getpreferredencoding
- _bootlocale.getpreferredencoding = _get_fake_codepage
- try:
+ with mock.patch('_winapi.GetACP', return_value=123):
codec = codecs.lookup('cp123')
self.assertEqual(codec.name, 'mbcs')
- finally:
- _bootlocale.getpreferredencoding = old_getpreferredencoding
class ASCIITest(unittest.TestCase):
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 6bb4127..6674831 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2580,6 +2580,7 @@ class TextIOWrapperTest(unittest.TestCase):
t.reconfigure(line_buffering=None)
self.assertEqual(t.line_buffering, True)
+ @unittest.skipIf(sys.flags.utf8_mode, "utf-8 mode is enabled")
def test_default_encoding(self):
old_environ = dict(os.environ)
try:
@@ -2599,6 +2600,7 @@ class TextIOWrapperTest(unittest.TestCase):
os.environ.update(old_environ)
@support.cpython_only
+ @unittest.skipIf(sys.flags.utf8_mode, "utf-8 mode is enabled")
def test_device_encoding(self):
# Issue 15989
import _testcapi
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 6346094..6933b41 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -527,7 +527,7 @@ class SysModuleTest(unittest.TestCase):
"inspect", "interactive", "optimize", "dont_write_bytecode",
"no_user_site", "no_site", "ignore_environment", "verbose",
"bytes_warning", "quiet", "hash_randomization", "isolated",
- "dev_mode")
+ "dev_mode", "utf8_mode")
for attr in attrs:
self.assertTrue(hasattr(sys.flags, attr), attr)
attr_type = bool if attr == "dev_mode" else int
@@ -535,6 +535,8 @@ class SysModuleTest(unittest.TestCase):
self.assertTrue(repr(sys.flags))
self.assertEqual(len(sys.flags), len(attrs))
+ self.assertIn(sys.flags.utf8_mode, {0, 1, 2})
+
def assert_raise_on_new_sys_type(self, sys_attr):
# Users are intentionally prevented from creating new instances of
# sys.flags, sys.version_info, and sys.getwindowsversion.
@@ -710,8 +712,8 @@ class SysModuleTest(unittest.TestCase):
# have no any effect
out = self.c_locale_get_error_handler(encoding=':')
self.assertEqual(out,
- 'stdin: surrogateescape\n'
- 'stdout: surrogateescape\n'
+ 'stdin: strict\n'
+ 'stdout: strict\n'
'stderr: backslashreplace\n')
out = self.c_locale_get_error_handler(encoding='')
self.assertEqual(out,
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
new file mode 100644
index 0000000..275a6ea
--- /dev/null
+++ b/Lib/test/test_utf8_mode.py
@@ -0,0 +1,206 @@
+"""
+Test the implementation of the PEP 540: the UTF-8 Mode.
+"""
+
+import locale
+import os
+import sys
+import textwrap
+import unittest
+from test.support.script_helper import assert_python_ok, assert_python_failure
+
+
+MS_WINDOWS = (sys.platform == 'win32')
+
+
+class UTF8ModeTests(unittest.TestCase):
+ # Override PYTHONUTF8 and PYTHONLEGACYWINDOWSFSENCODING environment
+ # variables by default
+ DEFAULT_ENV = {'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': ''}
+
+ def posix_locale(self):
+ loc = locale.setlocale(locale.LC_CTYPE, None)
+ return (loc == 'C')
+
+ def get_output(self, *args, failure=False, **kw):
+ kw = dict(self.DEFAULT_ENV, **kw)
+ if failure:
+ out = assert_python_failure(*args, **kw)
+ out = out[2]
+ else:
+ out = assert_python_ok(*args, **kw)
+ out = out[1]
+ return out.decode().rstrip("\n\r")
+
+ @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
+ def test_posix_locale(self):
+ code = 'import sys; print(sys.flags.utf8_mode)'
+
+ out = self.get_output('-c', code, LC_ALL='C')
+ self.assertEqual(out, '1')
+
+ def test_xoption(self):
+ code = 'import sys; print(sys.flags.utf8_mode)'
+
+ out = self.get_output('-X', 'utf8', '-c', code)
+ self.assertEqual(out, '1')
+
+ # undocumented but accepted syntax: -X utf8=1
+ out = self.get_output('-X', 'utf8=1', '-c', code)
+ self.assertEqual(out, '1')
+
+ out = self.get_output('-X', 'utf8=0', '-c', code)
+ self.assertEqual(out, '0')
+
+ if MS_WINDOWS:
+ # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8
+ # and has the priority over -X utf8
+ out = self.get_output('-X', 'utf8', '-c', code,
+ PYTHONLEGACYWINDOWSFSENCODING='1')
+ self.assertEqual(out, '0')
+
+ def test_env_var(self):
+ code = 'import sys; print(sys.flags.utf8_mode)'
+
+ out = self.get_output('-c', code, PYTHONUTF8='1')
+ self.assertEqual(out, '1')
+
+ out = self.get_output('-c', code, PYTHONUTF8='0')
+ self.assertEqual(out, '0')
+
+ # -X utf8 has the priority over PYTHONUTF8
+ out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
+ self.assertEqual(out, '0')
+
+ if MS_WINDOWS:
+ # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
+ # and has the priority over PYTHONUTF8
+ out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
+ PYTHONLEGACYWINDOWSFSENCODING='1')
+ self.assertEqual(out, '0')
+
+ # Cannot test with the POSIX locale, since the POSIX locale enables
+ # the UTF-8 mode
+ if not self.posix_locale():
+ # PYTHONUTF8 should be ignored if -E is used
+ out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
+ self.assertEqual(out, '0')
+
+ # invalid mode
+ out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
+ self.assertIn('invalid PYTHONUTF8 environment variable value',
+ out.rstrip())
+
+ def test_filesystemencoding(self):
+ code = textwrap.dedent('''
+ import sys
+ print("{}/{}".format(sys.getfilesystemencoding(),
+ sys.getfilesystemencodeerrors()))
+ ''')
+
+ if MS_WINDOWS:
+ expected = 'utf-8/surrogatepass'
+ else:
+ expected = 'utf-8/surrogateescape'
+
+ out = self.get_output('-X', 'utf8', '-c', code)
+ self.assertEqual(out, expected)
+
+ if MS_WINDOWS:
+ # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
+ # and has the priority over -X utf8 and PYTHONUTF8
+ out = self.get_output('-X', 'utf8', '-c', code,
+ PYTHONUTF8='strict',
+ PYTHONLEGACYWINDOWSFSENCODING='1')
+ self.assertEqual(out, 'mbcs/replace')
+
+ def test_stdio(self):
+ code = textwrap.dedent('''
+ import sys
+ print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
+ print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
+ print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
+ ''')
+
+ out = self.get_output('-X', 'utf8', '-c', code,
+ PYTHONIOENCODING='')
+ self.assertEqual(out.splitlines(),
+ ['stdin: utf-8/surrogateescape',
+ 'stdout: utf-8/surrogateescape',
+ 'stderr: utf-8/backslashreplace'])
+
+ # PYTHONIOENCODING has the priority over PYTHONUTF8
+ out = self.get_output('-X', 'utf8', '-c', code,
+ PYTHONIOENCODING="latin1")
+ self.assertEqual(out.splitlines(),
+ ['stdin: latin1/strict',
+ 'stdout: latin1/strict',
+ 'stderr: latin1/backslashreplace'])
+
+ out = self.get_output('-X', 'utf8', '-c', code,
+ PYTHONIOENCODING=":namereplace")
+ self.assertEqual(out.splitlines(),
+ ['stdin: UTF-8/namereplace',
+ 'stdout: UTF-8/namereplace',
+ 'stderr: UTF-8/backslashreplace'])
+
+ def test_io(self):
+ code = textwrap.dedent('''
+ import sys
+ filename = sys.argv[1]
+ with open(filename) as fp:
+ print(f"{fp.encoding}/{fp.errors}")
+ ''')
+ filename = __file__
+
+ out = self.get_output('-c', code, filename, PYTHONUTF8='1')
+ self.assertEqual(out, 'UTF-8/strict')
+
+ def _check_io_encoding(self, module, encoding=None, errors=None):
+ filename = __file__
+
+ # Encoding explicitly set
+ args = []
+ if encoding:
+ args.append(f'encoding={encoding!r}')
+ if errors:
+ args.append(f'errors={errors!r}')
+ code = textwrap.dedent('''
+ import sys
+ from %s import open
+ filename = sys.argv[1]
+ with open(filename, %s) as fp:
+ print(f"{fp.encoding}/{fp.errors}")
+ ''') % (module, ', '.join(args))
+ out = self.get_output('-c', code, filename,
+ PYTHONUTF8='1')
+
+ if not encoding:
+ encoding = 'UTF-8'
+ if not errors:
+ errors = 'strict'
+ self.assertEqual(out, f'{encoding}/{errors}')
+
+ def check_io_encoding(self, module):
+ self._check_io_encoding(module, encoding="latin1")
+ self._check_io_encoding(module, errors="namereplace")
+ self._check_io_encoding(module,
+ encoding="latin1", errors="namereplace")
+
+ def test_io_encoding(self):
+ self.check_io_encoding('io')
+
+ def test_io_encoding(self):
+ self.check_io_encoding('_pyio')
+
+ def test_locale_getpreferredencoding(self):
+ code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
+ out = self.get_output('-X', 'utf8', '-c', code)
+ self.assertEqual(out, 'UTF-8 UTF-8')
+
+ out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
+ self.assertEqual(out, 'UTF-8 UTF-8')
+
+
+if __name__ == "__main__":
+ unittest.main()