1 files changed, 206 insertions, 0 deletions
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
new file mode 100644
index 0000000..275a6ea
--- /dev/null
+++ b/Lib/test/test_utf8_mode.py
@@ -0,0 +1,206 @@
+"""
+Test the implementation of the PEP 540: the UTF-8 Mode.
+"""
+
+import locale
+import os
+import sys
+import textwrap
+import unittest
+from test.support.script_helper import assert_python_ok, assert_python_failure
+
+
+MS_WINDOWS = (sys.platform == 'win32')
+
+
+class UTF8ModeTests(unittest.TestCase):
+    # Override PYTHONUTF8 and PYTHONLEGACYWINDOWSFSENCODING environment
+    # variables by default
+    DEFAULT_ENV = {'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': ''}
+
+    def posix_locale(self):
+        loc = locale.setlocale(locale.LC_CTYPE, None)
+        return (loc == 'C')
+
+    def get_output(self, *args, failure=False, **kw):
+        kw = dict(self.DEFAULT_ENV, **kw)
+        if failure:
+            out = assert_python_failure(*args, **kw)
+            out = out[2]
+        else:
+            out = assert_python_ok(*args, **kw)
+            out = out[1]
+        return out.decode().rstrip("\n\r")
+
+    @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
+    def test_posix_locale(self):
+        code = 'import sys; print(sys.flags.utf8_mode)'
+
+        out = self.get_output('-c', code, LC_ALL='C')
+        self.assertEqual(out, '1')
+
+    def test_xoption(self):
+        code = 'import sys; print(sys.flags.utf8_mode)'
+
+        out = self.get_output('-X', 'utf8', '-c', code)
+        self.assertEqual(out, '1')
+
+        # undocumented but accepted syntax: -X utf8=1
+        out = self.get_output('-X', 'utf8=1', '-c', code)
+        self.assertEqual(out, '1')
+
+        out = self.get_output('-X', 'utf8=0', '-c', code)
+        self.assertEqual(out, '0')
+
+        if MS_WINDOWS:
+            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8
+            # and has the priority over -X utf8
+            out = self.get_output('-X', 'utf8', '-c', code,
+                                  PYTHONLEGACYWINDOWSFSENCODING='1')
+            self.assertEqual(out, '0')
+
+    def test_env_var(self):
+        code = 'import sys; print(sys.flags.utf8_mode)'
+
+        out = self.get_output('-c', code, PYTHONUTF8='1')
+        self.assertEqual(out, '1')
+
+        out = self.get_output('-c', code, PYTHONUTF8='0')
+        self.assertEqual(out, '0')
+
+        # -X utf8 has the priority over PYTHONUTF8
+        out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
+        self.assertEqual(out, '0')
+
+        if MS_WINDOWS:
+            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
+            # and has the priority over PYTHONUTF8
+            out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
+                                  PYTHONLEGACYWINDOWSFSENCODING='1')
+            self.assertEqual(out, '0')
+
+        # Cannot test with the POSIX locale, since the POSIX locale enables
+        # the UTF-8 mode
+        if not self.posix_locale():
+            # PYTHONUTF8 should be ignored if -E is used
+            out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
+            self.assertEqual(out, '0')
+
+        # invalid mode
+        out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
+        self.assertIn('invalid PYTHONUTF8 environment variable value',
+                      out.rstrip())
+
+    def test_filesystemencoding(self):
+        code = textwrap.dedent('''
+            import sys
+            print("{}/{}".format(sys.getfilesystemencoding(),
+                                 sys.getfilesystemencodeerrors()))
+        ''')
+
+        if MS_WINDOWS:
+            expected = 'utf-8/surrogatepass'
+        else:
+            expected = 'utf-8/surrogateescape'
+
+        out = self.get_output('-X', 'utf8', '-c', code)
+        self.assertEqual(out, expected)
+
+        if MS_WINDOWS:
+            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
+            # and has the priority over -X utf8 and PYTHONUTF8
+            out = self.get_output('-X', 'utf8', '-c', code,
+                                  PYTHONUTF8='strict',
+                                  PYTHONLEGACYWINDOWSFSENCODING='1')
+            self.assertEqual(out, 'mbcs/replace')
+
+    def test_stdio(self):
+        code = textwrap.dedent('''
+            import sys
+            print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
+            print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
+            print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
+        ''')
+
+        out = self.get_output('-X', 'utf8', '-c', code,
+                              PYTHONIOENCODING='')
+        self.assertEqual(out.splitlines(),
+                         ['stdin: utf-8/surrogateescape',
+                          'stdout: utf-8/surrogateescape',
+                          'stderr: utf-8/backslashreplace'])
+
+        # PYTHONIOENCODING has the priority over PYTHONUTF8
+        out = self.get_output('-X', 'utf8', '-c', code,
+                              PYTHONIOENCODING="latin1")
+        self.assertEqual(out.splitlines(),
+                         ['stdin: latin1/strict',
+                          'stdout: latin1/strict',
+                          'stderr: latin1/backslashreplace'])
+
+        out = self.get_output('-X', 'utf8', '-c', code,
+                              PYTHONIOENCODING=":namereplace")
+        self.assertEqual(out.splitlines(),
+                         ['stdin: UTF-8/namereplace',
+                          'stdout: UTF-8/namereplace',
+                          'stderr: UTF-8/backslashreplace'])
+
+    def test_io(self):
+        code = textwrap.dedent('''
+            import sys
+            filename = sys.argv[1]
+            with open(filename) as fp:
+                print(f"{fp.encoding}/{fp.errors}")
+        ''')
+        filename = __file__
+
+        out = self.get_output('-c', code, filename, PYTHONUTF8='1')
+        self.assertEqual(out, 'UTF-8/strict')
+
+    def _check_io_encoding(self, module, encoding=None, errors=None):
+        filename = __file__
+
+        # Encoding explicitly set
+        args = []
+        if encoding:
+            args.append(f'encoding={encoding!r}')
+        if errors:
+            args.append(f'errors={errors!r}')
+        code = textwrap.dedent('''
+            import sys
+            from %s import open
+            filename = sys.argv[1]
+            with open(filename, %s) as fp:
+                print(f"{fp.encoding}/{fp.errors}")
+        ''') % (module, ', '.join(args))
+        out = self.get_output('-c', code, filename,
+                              PYTHONUTF8='1')
+
+        if not encoding:
+            encoding = 'UTF-8'
+        if not errors:
+            errors = 'strict'
+        self.assertEqual(out, f'{encoding}/{errors}')
+
+    def check_io_encoding(self, module):
+        self._check_io_encoding(module, encoding="latin1")
+        self._check_io_encoding(module, errors="namereplace")
+        self._check_io_encoding(module,
+                                encoding="latin1", errors="namereplace")
+
+    def test_io_encoding(self):
+        self.check_io_encoding('io')
+
+    def test_io_encoding(self):
+        self.check_io_encoding('_pyio')
+
+    def test_locale_getpreferredencoding(self):
+        code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
+        out = self.get_output('-X', 'utf8', '-c', code)
+        self.assertEqual(out, 'UTF-8 UTF-8')
+
+        out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
+        self.assertEqual(out, 'UTF-8 UTF-8')
+
+
+if __name__ == "__main__":
+    unittest.main()