diff options
author | Nick Coghlan <ncoghlan@gmail.com> | 2017-06-11 03:16:15 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-11 03:16:15 (GMT) |
commit | 6ea4186de32d65b1f1dc1533b6312b798d300466 (patch) | |
tree | bac8387f2da20d5b6d5f74bb09fb5708d1fbd61e /Lib/test/test_c_locale_coercion.py | |
parent | 0afbabe245e2eb6f7cef3b70531d69f2a7ad8295 (diff) | |
download | cpython-6ea4186de32d65b1f1dc1533b6312b798d300466.zip cpython-6ea4186de32d65b1f1dc1533b6312b798d300466.tar.gz cpython-6ea4186de32d65b1f1dc1533b6312b798d300466.tar.bz2 |
bpo-28180: Implementation for PEP 538 (#659)
- new PYTHONCOERCECLOCALE config setting
- coerces legacy C locale to C.UTF-8, C.utf8 or UTF-8 by default
- always uses C.UTF-8 on Android
- uses `surrogateescape` on stdin and stdout in the coercion
target locales
- configure option to disable locale coercion at build time
- configure option to disable C locale warning at build time
Diffstat (limited to 'Lib/test/test_c_locale_coercion.py')
-rw-r--r-- | Lib/test/test_c_locale_coercion.py | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py new file mode 100644 index 0000000..c14d820 --- /dev/null +++ b/Lib/test/test_c_locale_coercion.py @@ -0,0 +1,262 @@ +# Tests the attempted automatic coercion of the C locale to a UTF-8 locale + +import unittest +import os +import sys +import sysconfig +import shutil +import subprocess +from collections import namedtuple + +import test.support +from test.support.script_helper import ( + run_python_until_end, + interpreter_requires_environment, +) + +# In order to get the warning messages to match up as expected, the candidate +# order here must much the target locale order in Python/pylifecycle.c +_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") + +# There's no reliable cross-platform way of checking locale alias +# lists, so the only way of knowing which of these locales will work +# is to try them with locale.setlocale(). We do that in a subprocess +# to avoid altering the locale of the test runner. +def _set_locale_in_subprocess(locale_name): + cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" + cmd = cmd_fmt.format(locale_name) + result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) + return result.rc == 0 + +_EncodingDetails = namedtuple("EncodingDetails", + "fsencoding stdin_info stdout_info stderr_info") + +class EncodingDetails(_EncodingDetails): + CHILD_PROCESS_SCRIPT = ";".join([ + "import sys", + "print(sys.getfilesystemencoding())", + "print(sys.stdin.encoding + ':' + sys.stdin.errors)", + "print(sys.stdout.encoding + ':' + sys.stdout.errors)", + "print(sys.stderr.encoding + ':' + sys.stderr.errors)", + ]) + + @classmethod + def get_expected_details(cls, expected_fsencoding): + """Returns expected child process details for a given encoding""" + _stream = expected_fsencoding + ":{}" + # stdin and stdout should use surrogateescape either because the + # coercion triggered, or because the C locale was detected + stream_info = 2*[_stream.format("surrogateescape")] + # stderr should always use backslashreplace + stream_info.append(_stream.format("backslashreplace")) + return dict(cls(expected_fsencoding, *stream_info)._asdict()) + + @staticmethod + def _handle_output_variations(data): + """Adjust the output to handle platform specific idiosyncrasies + + * Some platforms report ASCII as ANSI_X3.4-1968 + * Some platforms report ASCII as US-ASCII + * Some platforms report UTF-8 instead of utf-8 + """ + data = data.replace(b"ANSI_X3.4-1968", b"ascii") + data = data.replace(b"US-ASCII", b"ascii") + data = data.lower() + return data + + @classmethod + def get_child_details(cls, env_vars): + """Retrieves fsencoding and standard stream details from a child process + + Returns (encoding_details, stderr_lines): + + - encoding_details: EncodingDetails for eager decoding + - stderr_lines: result of calling splitlines() on the stderr output + + The child is run in isolated mode if the current interpreter supports + that. + """ + result, py_cmd = run_python_until_end( + "-c", cls.CHILD_PROCESS_SCRIPT, + __isolated=True, + **env_vars + ) + if not result.rc == 0: + result.fail(py_cmd) + # All subprocess outputs in this test case should be pure ASCII + adjusted_output = cls._handle_output_variations(result.out) + stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines() + child_encoding_details = dict(cls(*stdout_lines)._asdict()) + stderr_lines = result.err.decode("ascii").rstrip().splitlines() + return child_encoding_details, stderr_lines + + +class _ChildProcessEncodingTestCase(unittest.TestCase): + # Base class to check for expected encoding details in a child process + + def _check_child_encoding_details(self, + env_vars, + expected_fsencoding, + expected_warning): + """Check the C locale handling for the given process environment + + Parameters: + expected_fsencoding: the encoding the child is expected to report + allow_c_locale: setting to use for PYTHONALLOWCLOCALE + None: don't set the variable at all + str: the value set in the child's environment + """ + result = EncodingDetails.get_child_details(env_vars) + encoding_details, stderr_lines = result + self.assertEqual(encoding_details, + EncodingDetails.get_expected_details( + expected_fsencoding)) + self.assertEqual(stderr_lines, expected_warning) + +# Details of the shared library warning emitted at runtime +LIBRARY_C_LOCALE_WARNING = ( + "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " + "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " + "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " + "locales is recommended." +) + +@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"), + "C locale runtime warning disabled at build time") +class LocaleWarningTests(_ChildProcessEncodingTestCase): + # Test warning emitted when running in the C locale + + def test_library_c_locale_warning(self): + self.maxDiff = None + for locale_to_set in ("C", "POSIX", "invalid.ascii"): + var_dict = { + "LC_ALL": locale_to_set + } + with self.subTest(forced_locale=locale_to_set): + self._check_child_encoding_details(var_dict, + "ascii", + [LIBRARY_C_LOCALE_WARNING]) + +# Details of the CLI locale coercion warning emitted at runtime +CLI_COERCION_WARNING_FMT = ( + "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " + "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)." +) + +class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase): + # Base class for test cases that rely on coercion targets being defined + + available_targets = [] + targets_required = True + + @classmethod + def setUpClass(cls): + first_target_locale = None + available_targets = cls.available_targets + # Find the target locales available in the current system + for target_locale in _C_UTF8_LOCALES: + if _set_locale_in_subprocess(target_locale): + available_targets.append(target_locale) + if first_target_locale is None: + first_target_locale = target_locale + if cls.targets_required and not available_targets: + raise unittest.SkipTest("No C-with-UTF-8 locale available") + # Expect coercion to use the first available locale + warning_msg = CLI_COERCION_WARNING_FMT.format(first_target_locale) + cls.EXPECTED_COERCION_WARNING = warning_msg + + +class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase): + # Test explicit external configuration via the process environment + + def test_external_target_locale_configuration(self): + # Explicitly setting a target locale should give the same behaviour as + # is seen when implicitly coercing to that target locale + self.maxDiff = None + + expected_warning = [] + expected_fsencoding = "utf-8" + + base_var_dict = { + "LANG": "", + "LC_CTYPE": "", + "LC_ALL": "", + } + for env_var in ("LANG", "LC_CTYPE"): + for locale_to_set in self.available_targets: + with self.subTest(env_var=env_var, + configured_locale=locale_to_set): + var_dict = base_var_dict.copy() + var_dict[env_var] = locale_to_set + self._check_child_encoding_details(var_dict, + expected_fsencoding, + expected_warning) + + + +@test.support.cpython_only +@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), + "C locale coercion disabled at build time") +class LocaleCoercionTests(_LocaleCoercionTargetsTestCase): + # Test implicit reconfiguration of the environment during CLI startup + + def _check_c_locale_coercion(self, expected_fsencoding, coerce_c_locale): + """Check the C locale handling for various configurations + + Parameters: + expected_fsencoding: the encoding the child is expected to report + allow_c_locale: setting to use for PYTHONALLOWCLOCALE + None: don't set the variable at all + str: the value set in the child's environment + """ + + # Check for expected warning on stderr if C locale is coerced + self.maxDiff = None + + expected_warning = [] + if coerce_c_locale != "0": + expected_warning.append(self.EXPECTED_COERCION_WARNING) + + base_var_dict = { + "LANG": "", + "LC_CTYPE": "", + "LC_ALL": "", + } + for env_var in ("LANG", "LC_CTYPE"): + for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): + with self.subTest(env_var=env_var, + nominal_locale=locale_to_set, + PYTHONCOERCECLOCALE=coerce_c_locale): + var_dict = base_var_dict.copy() + var_dict[env_var] = locale_to_set + if coerce_c_locale is not None: + var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale + self._check_child_encoding_details(var_dict, + expected_fsencoding, + expected_warning) + + def test_test_PYTHONCOERCECLOCALE_not_set(self): + # This should coerce to the first available target locale by default + self._check_c_locale_coercion("utf-8", coerce_c_locale=None) + + def test_PYTHONCOERCECLOCALE_not_zero(self): + # *Any* string other that "0" is considered "set" for our purposes + # and hence should result in the locale coercion being enabled + for setting in ("", "1", "true", "false"): + self._check_c_locale_coercion("utf-8", coerce_c_locale=setting) + + def test_PYTHONCOERCECLOCALE_set_to_zero(self): + # The setting "0" should result in the locale coercion being disabled + self._check_c_locale_coercion("ascii", coerce_c_locale="0") + + +def test_main(): + test.support.run_unittest( + LocaleConfigurationTests, + LocaleCoercionTests, + LocaleWarningTests + ) + test.support.reap_children() + +if __name__ == "__main__": + test_main() |