From dbdee0073cf0b88fe541980ace1f650900f455cc Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 17 Sep 2018 17:19:26 -0700 Subject: bpo-34589: Add -X coerce_c_locale command line option (GH-9378) Add a new -X coerce_c_locale command line option to control C locale coercion (PEP 538). --- Doc/using/cmdline.rst | 16 ++- Doc/whatsnew/3.7.rst | 7 ++ Lib/test/test_c_locale_coercion.py | 55 ++++++++-- Lib/test/test_cmd_line.py | 7 +- Lib/test/test_sys.py | 8 +- Lib/test/test_utf8_mode.py | 3 +- .../2018-09-18-01-41-33.bpo-34589.lLVTYc.rst | 2 + Python/coreconfig.c | 114 ++++++++++++++------- 8 files changed, 160 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index b61df8a..cd3b241 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -438,13 +438,22 @@ Miscellaneous options * Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to ``True`` - * ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding + * ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8 mode (even when it would otherwise activate automatically). See :envvar:`PYTHONUTF8` for more details. * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel tree rooted at the given directory instead of to the code tree. See also :envvar:`PYTHONPYCACHEPREFIX`. + * ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C + locale (:pep:`538`). + ``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX + locales to a more capable UTF-8 based alternative. + ``-X coerce_c_locale=warn`` will cause Python to emit warning messages on + ``stderr`` if either the locale coercion activates, or else if a locale + that *would* have triggered coercion is still active when the Python + runtime is initialized. + See :envvar:`PYTHONCOERCECLOCALE` for more details. It also allows passing arbitrary values and retrieving them through the :data:`sys._xoptions` dictionary. @@ -464,6 +473,9 @@ Miscellaneous options .. versionadded:: 3.7 The ``-X importtime``, ``-X dev`` and ``-X utf8`` options. + .. versionadded:: 3.7.1 + The ``-X coerce_c_locale`` option. + .. versionadded:: 3.8 The ``-X pycache_prefix`` option. @@ -850,6 +862,8 @@ conflict. order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for system interfaces. + Also available as the :option:`-X` ``coerce_c_locale`` option. + Availability: \*nix .. versionadded:: 3.7 diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index f53a026..6cd9d46 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -2494,3 +2494,10 @@ versions, it respected an ill-defined subset of those environment variables, while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before calling :c:func:`Py_Initialize`. + +:c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale +coercion (:pep:`538`) anymore: it is always disabled. It can now only be +enabled by the Python program ("python3). + +New :option:`-X` ``coerce_c_locale`` command line option to control C locale +coercion (:pep:`538`). diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 1db293b..f62208a 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -139,7 +139,7 @@ class EncodingDetails(_EncodingDetails): return data @classmethod - def get_child_details(cls, env_vars): + def get_child_details(cls, env_vars, xoption=None): """Retrieves fsencoding and standard stream details from a child process Returns (encoding_details, stderr_lines): @@ -150,10 +150,11 @@ class EncodingDetails(_EncodingDetails): The child is run in isolated mode if the current interpreter supports that. """ - result, py_cmd = run_python_until_end( - "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, - **env_vars - ) + args = [] + if xoption: + args.extend(("-X", f"coerce_c_locale={xoption}")) + args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT)) + result, py_cmd = run_python_until_end(*args, **env_vars) if not result.rc == 0: result.fail(py_cmd) # All subprocess outputs in this test case should be pure ASCII @@ -212,7 +213,8 @@ class _LocaleHandlingTestCase(unittest.TestCase): expected_fs_encoding, expected_stream_encoding, expected_warnings, - coercion_expected): + coercion_expected, + xoption=None): """Check the C locale handling for the given process environment Parameters: @@ -220,7 +222,7 @@ class _LocaleHandlingTestCase(unittest.TestCase): expected_stream_encoding: expected encoding for standard streams expected_warning: stderr output to expect (if any) """ - result = EncodingDetails.get_child_details(env_vars) + result = EncodingDetails.get_child_details(env_vars, xoption) encoding_details, stderr_lines = result expected_details = EncodingDetails.get_expected_details( coercion_expected, @@ -290,6 +292,7 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): coerce_c_locale, expected_warnings=None, coercion_expected=True, + use_xoption=False, **extra_vars): """Check the C locale handling for various configurations @@ -319,8 +322,12 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): "PYTHONCOERCECLOCALE": "", } base_var_dict.update(extra_vars) + xoption = None if coerce_c_locale is not None: - base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale + if use_xoption: + xoption = coerce_c_locale + else: + base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale # Check behaviour for the default locale with self.subTest(default_locale=True, @@ -342,7 +349,8 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): fs_encoding, stream_encoding, _expected_warnings, - _coercion_expected) + _coercion_expected, + xoption=xoption) # Check behaviour for explicitly configured locales for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS: @@ -357,7 +365,8 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): fs_encoding, stream_encoding, expected_warnings, - coercion_expected) + coercion_expected, + xoption=xoption) def test_PYTHONCOERCECLOCALE_not_set(self): # This should coerce to the first available target locale by default @@ -404,6 +413,32 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): expected_warnings=[LEGACY_LOCALE_WARNING], coercion_expected=False) + def test_xoption_set_to_1(self): + self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1", + use_xoption=True) + + def test_xoption_set_to_zero(self): + # The setting "0" should result in the locale coercion being disabled + self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, + EXPECTED_C_LOCALE_STREAM_ENCODING, + coerce_c_locale="0", + coercion_expected=False, + use_xoption=True) + # Setting LC_ALL=C shouldn't make any difference to the behaviour + self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, + EXPECTED_C_LOCALE_STREAM_ENCODING, + coerce_c_locale="0", + LC_ALL="C", + coercion_expected=False, + use_xoption=True) + + def test_xoption_set_to_warn(self): + # -X coerce_c_locale=warn enables runtime warnings for legacy locales + self._check_c_locale_coercion("utf-8", "utf-8", + coerce_c_locale="warn", + expected_warnings=[CLI_COERCION_WARNING], + use_xoption=True) + def test_main(): test.support.run_unittest( LocaleConfigurationTests, diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 21511b8..7e967b2 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -159,13 +159,16 @@ class CmdLineTest(unittest.TestCase): env = os.environ.copy() # Use C locale to get ascii for the locale encoding env['LC_ALL'] = 'C' - env['PYTHONCOERCECLOCALE'] = '0' code = ( b'import locale; ' b'print(ascii("' + undecodable + b'"), ' b'locale.getpreferredencoding())') p = subprocess.Popen( - [sys.executable, "-c", code], + [sys.executable, + # Disable C locale coercion and UTF-8 Mode to not use UTF-8 + "-X", "coerce_c_locale=0", + "-X", "utf8=0", + "-c", code], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env) stdout, stderr = p.communicate() diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index b90366d..a7f2928 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -656,9 +656,8 @@ class SysModuleTest(unittest.TestCase): def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): # Force the POSIX locale - env = os.environ.copy() + env = dict(os.environ) env["LC_ALL"] = locale - env["PYTHONCOERCECLOCALE"] = "0" code = '\n'.join(( 'import sys', 'def dump(name):', @@ -668,7 +667,10 @@ class SysModuleTest(unittest.TestCase): 'dump("stdout")', 'dump("stderr")', )) - args = [sys.executable, "-X", "utf8=0", "-c", code] + args = [sys.executable, + "-X", "utf8=0", + "-X", "coerce_c_locale=0", + "-c", code] if isolated: args.append("-I") if encoding is not None: diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 7280ce7..c3cbb49 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -27,6 +27,8 @@ class UTF8ModeTests(unittest.TestCase): return (loc in POSIX_LOCALES) def get_output(self, *args, failure=False, **kw): + # Always disable the C locale coercion (PEP 538) + args = ('-X', 'coerce_c_locale=0', *args) kw = dict(self.DEFAULT_ENV, **kw) if failure: out = assert_python_failure(*args, **kw) @@ -116,7 +118,6 @@ class UTF8ModeTests(unittest.TestCase): # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode # and has the priority over -X utf8 and PYTHONUTF8 out = self.get_output('-X', 'utf8', '-c', code, - PYTHONUTF8='strict', PYTHONLEGACYWINDOWSFSENCODING='1') self.assertEqual(out, 'mbcs/replace') diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst b/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst new file mode 100644 index 0000000..618092d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-18-01-41-33.bpo-34589.lLVTYc.rst @@ -0,0 +1,2 @@ +Add a new :option:`-X` ``coerce_c_locale`` command line option to control C +locale coercion (:pep:`538`). diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 131a043..b2459dc 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -705,6 +705,17 @@ config_init_utf8_mode(_PyCoreConfig *config) return _Py_INIT_OK(); } +#ifndef MS_WINDOWS + /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ + const char *ctype_loc = setlocale(LC_CTYPE, NULL); + if (ctype_loc != NULL + && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0)) + { + config->utf8_mode = 1; + return _Py_INIT_OK(); + } +#endif + return _Py_INIT_OK(); } @@ -808,25 +819,6 @@ config_read_env_vars(_PyCoreConfig *config) config->malloc_stats = 1; } - const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); - if (env) { - if (strcmp(env, "0") == 0) { - if (config->_coerce_c_locale < 0) { - config->_coerce_c_locale = 0; - } - } - else if (strcmp(env, "warn") == 0) { - if (config->_coerce_c_locale_warn < 0) { - config->_coerce_c_locale_warn = 1; - } - } - else { - if (config->_coerce_c_locale < 0) { - config->_coerce_c_locale = 1; - } - } - } - wchar_t *path; int res = _PyCoreConfig_GetEnvDup(config, &path, L"PYTHONPATH", "PYTHONPATH"); @@ -966,28 +958,76 @@ config_read_complex_options(_PyCoreConfig *config) } -static void -config_init_locale(_PyCoreConfig *config) +static _PyInitError +config_init_coerce_c_locale(_PyCoreConfig *config) { + const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale"); + if (xopt) { + wchar_t *sep = wcschr(xopt, L'='); + if (sep) { + xopt = sep + 1; + if (wcscmp(xopt, L"1") == 0) { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 1; + } + } + else if (wcscmp(xopt, L"0") == 0) { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 0; + } + } + else if (wcscmp(xopt, L"warn") == 0) { + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 1; + } + } + else { + return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value"); + } + } + else { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 1; + } + } + + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 0; + } + } + + const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); + if (env) { + if (strcmp(env, "0") == 0) { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 0; + } + } + else if (strcmp(env, "warn") == 0) { + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 1; + } + } + else { + if (config->_coerce_c_locale < 0) { + config->_coerce_c_locale = 1; + } + } + + if (config->_coerce_c_locale_warn < 0) { + config->_coerce_c_locale_warn = 0; + } + } + if (config->_coerce_c_locale < 0) { /* The C locale enables the C locale coercion (PEP 538) */ if (_Py_LegacyLocaleDetected()) { config->_coerce_c_locale = 1; + return _Py_INIT_OK(); } } -#ifndef MS_WINDOWS - if (config->utf8_mode < 0) { - /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ - const char *ctype_loc = setlocale(LC_CTYPE, NULL); - if (ctype_loc != NULL - && (strcmp(ctype_loc, "C") == 0 - || strcmp(ctype_loc, "POSIX") == 0)) - { - config->utf8_mode = 1; - } - } -#endif + return _Py_INIT_OK(); } @@ -1293,8 +1333,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config) } } - if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) { - config_init_locale(config); + if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) { + err = config_init_coerce_c_locale(config); + if (_Py_INIT_FAILED(err)) { + return err; + } } if (config->_install_importlib) { @@ -1349,6 +1392,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config) } assert(config->_coerce_c_locale >= 0); + assert(config->_coerce_c_locale_warn >= 0); assert(config->use_environment >= 0); assert(config->filesystem_encoding != NULL); assert(config->filesystem_errors != NULL); -- cgit v0.12