diff options
author | Victor Stinner <vstinner@redhat.com> | 2019-03-27 17:28:46 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-27 17:28:46 (GMT) |
commit | d929f1838a8fba881ff0148b7fc31f6265703e3d (patch) | |
tree | 36ff97834b250c4412d5a95c2206c8ba9d5cf13e | |
parent | 4a9a505d6f2474a570422dad89f8d1b344d6cd36 (diff) | |
download | cpython-d929f1838a8fba881ff0148b7fc31f6265703e3d.zip cpython-d929f1838a8fba881ff0148b7fc31f6265703e3d.tar.gz cpython-d929f1838a8fba881ff0148b7fc31f6265703e3d.tar.bz2 |
bpo-36443: Disable C locale coercion and UTF-8 Mode by default (GH-12589)
bpo-36443, bpo-36202: Since Python 3.7.0, calling Py_DecodeLocale()
before Py_Initialize() produces mojibake if the LC_CTYPE locale is
coerced and/or if the UTF-8 Mode is enabled by the user
configuration. This change fix the issue by disabling LC_CTYPE
coercion and UTF-8 Mode by default. They must now be enabled
explicitly (opt-in) using the new _Py_PreInitialize() API with
_PyPreConfig.
When embedding Python, set coerce_c_locale and utf8_mode attributes
of _PyPreConfig to -1 to enable automatically these parameters
depending on the LC_CTYPE locale, environment variables and command
line arguments
Alternative: Setting Py_UTF8Mode to 1 always explicitly enables the
UTF-8 Mode.
Changes:
* _PyPreConfig_INIT now sets coerce_c_locale and utf8_mode to 0 by
default.
* _Py_InitializeFromArgs() and _Py_InitializeFromWideArgs() can now
be called with config=NULL.
-rw-r--r-- | Include/cpython/coreconfig.h | 27 | ||||
-rw-r--r-- | Lib/test/test_embed.py | 5 | ||||
-rw-r--r-- | Misc/NEWS.d/next/C API/2019-03-27-15-58-23.bpo-36443.tAfZR9.rst | 6 | ||||
-rw-r--r-- | Modules/main.c | 19 | ||||
-rw-r--r-- | Programs/_testembed.c | 21 | ||||
-rw-r--r-- | Python/preconfig.c | 4 | ||||
-rw-r--r-- | Python/pylifecycle.c | 22 |
7 files changed, 58 insertions, 46 deletions
diff --git a/Include/cpython/coreconfig.h b/Include/cpython/coreconfig.h index 27ee1f4..7ce1a02 100644 --- a/Include/cpython/coreconfig.h +++ b/Include/cpython/coreconfig.h @@ -63,13 +63,20 @@ typedef struct { set to !Py_IgnoreEnvironmentFlag. */ int use_environment; - /* PYTHONCOERCECLOCALE, -1 means unknown. + /* Coerce the LC_CTYPE locale if it's equal to "C"? (PEP 538) + + Set to 0 by PYTHONCOERCECLOCALE=0. Set to 1 by PYTHONCOERCECLOCALE=1. + Set to 2 if the user preferred LC_CTYPE locale is "C". If it is equal to 1, LC_CTYPE locale is read to decide it it should be coerced or not (ex: PYTHONCOERCECLOCALE=1). Internally, it is set to 2 if the LC_CTYPE locale must be coerced. */ int coerce_c_locale; - int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */ + + /* Emit a warning if the LC_CTYPE locale is coerced? + + Disabled by default. Set to 1 by PYTHONCOERCECLOCALE=warn. */ + int coerce_c_locale_warn; #ifdef MS_WINDOWS /* If greater than 1, use the "mbcs" encoding instead of the UTF-8 @@ -83,9 +90,17 @@ typedef struct { int legacy_windows_fs_encoding; #endif - /* Enable UTF-8 mode? - Set by -X utf8 command line option and PYTHONUTF8 environment variable. - If set to -1 (default), inherit Py_UTF8Mode value. */ + /* Enable UTF-8 mode? (PEP 540) + + Disabled by default (equals to 0). + + Set to 1 by "-X utf8" and "-X utf8=1" command line options. + Set to 1 by PYTHONUTF8=1 environment variable. + + Set to 0 by "-X utf8=0" and PYTHONUTF8=0. + + If equals to -1, it is set to 1 if the LC_CTYPE locale is "C" or + "POSIX", otherwise inherit Py_UTF8Mode value. */ int utf8_mode; int dev_mode; /* Development mode. PYTHONDEVMODE, -X dev */ @@ -104,8 +119,6 @@ typedef struct { _PyPreConfig_WINDOWS_INIT \ .isolated = -1, \ .use_environment = -1, \ - .coerce_c_locale = -1, \ - .utf8_mode = -1, \ .dev_mode = -1, \ .allocator = NULL} diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index c63ea5a..164527a 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -494,8 +494,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): if key not in expected_preconfig: expected_preconfig[key] = expected_config[key] - self.check_core_config(config, expected_config) self.check_pre_config(config, expected_preconfig) + self.check_core_config(config, expected_config) self.check_global_config(config) def test_init_default_config(self): @@ -573,7 +573,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): INIT_ENV_PRECONFIG = { 'allocator': 'malloc', - 'utf8_mode': 1, } INIT_ENV_CONFIG = { 'use_hash_seed': 1, @@ -581,8 +580,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'tracemalloc': 2, 'import_time': 1, 'malloc_stats': 1, - 'filesystem_encoding': 'utf-8', - 'filesystem_errors': UTF8_MODE_ERRORS, 'inspect': 1, 'optimization_level': 2, 'pycache_prefix': 'env_pycache_prefix', diff --git a/Misc/NEWS.d/next/C API/2019-03-27-15-58-23.bpo-36443.tAfZR9.rst b/Misc/NEWS.d/next/C API/2019-03-27-15-58-23.bpo-36443.tAfZR9.rst new file mode 100644 index 0000000..3d98c31 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2019-03-27-15-58-23.bpo-36443.tAfZR9.rst @@ -0,0 +1,6 @@ +Since Python 3.7.0, calling :c:func:`Py_DecodeLocale` before +:c:func:`Py_Initialize` produces mojibake if the ``LC_CTYPE`` locale is coerced +and/or if the UTF-8 Mode is enabled by the user configuration. The LC_CTYPE +coercion and UTF-8 Mode are now disabled by default to fix the mojibake issue. +They must now be enabled explicitly (opt-in) using the new +:c:func:`_Py_PreInitialize` API with ``_PyPreConfig``. diff --git a/Modules/main.c b/Modules/main.c index ff79edb..7665769 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -52,23 +52,30 @@ pymain_init(const _PyArgv *args) fedisableexcept(FE_OVERFLOW); #endif - _PyCoreConfig config = _PyCoreConfig_INIT; - + _PyPreConfig preconfig = _PyPreConfig_INIT; + /* Set to -1 to enable them depending on the LC_CTYPE locale and the + environment variables (PYTHONUTF8 and PYTHONCOERCECLOCALE) */ + preconfig.coerce_c_locale = -1; + preconfig.utf8_mode = -1; if (args->use_bytes_argv) { - err = _Py_PreInitializeFromArgs(NULL, args->argc, args->bytes_argv); + err = _Py_PreInitializeFromArgs(&preconfig, + args->argc, args->bytes_argv); } else { - err = _Py_PreInitializeFromWideArgs(NULL, args->argc, args->wchar_argv); + err = _Py_PreInitializeFromWideArgs(&preconfig, + args->argc, args->wchar_argv); } if (_Py_INIT_FAILED(err)) { return err; } + /* pass NULL as the config: config is read from command line arguments, + environment variables, configuration files */ if (args->use_bytes_argv) { - return _Py_InitializeFromArgs(&config, args->argc, args->bytes_argv); + return _Py_InitializeFromArgs(NULL, args->argc, args->bytes_argv); } else { - return _Py_InitializeFromWideArgs(&config, args->argc, args->wchar_argv); + return _Py_InitializeFromWideArgs(NULL, args->argc, args->wchar_argv); } } diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 425954c..d8e12cf 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -441,8 +441,6 @@ static int test_init_from_config(void) putenv("PYTHONMALLOCSTATS=0"); config.malloc_stats = 1; - /* FIXME: test coerce_c_locale and coerce_c_locale_warn */ - putenv("PYTHONPYCACHEPREFIX=env_pycache_prefix"); config.pycache_prefix = L"conf_pycache_prefix"; @@ -617,17 +615,6 @@ static int test_init_isolated(void) { _PyInitError err; - _PyPreConfig preconfig = _PyPreConfig_INIT; - - /* Set coerce_c_locale and utf8_mode to not depend on the locale */ - preconfig.coerce_c_locale = 0; - preconfig.utf8_mode = 0; - - err = _Py_PreInitialize(&preconfig); - if (_Py_INIT_FAILED(err)) { - _Py_ExitInitError(err); - } - /* Test _PyCoreConfig.isolated=1 */ _PyCoreConfig config = _PyCoreConfig_INIT; @@ -654,10 +641,6 @@ static int test_preinit_isolated1(void) _PyInitError err; _PyPreConfig preconfig = _PyPreConfig_INIT; - - /* Set coerce_c_locale and utf8_mode to not depend on the locale */ - preconfig.coerce_c_locale = 0; - preconfig.utf8_mode = 0; preconfig.isolated = 1; err = _Py_PreInitialize(&preconfig); @@ -685,10 +668,6 @@ static int test_preinit_isolated2(void) _PyInitError err; _PyPreConfig preconfig = _PyPreConfig_INIT; - - /* Set coerce_c_locale and utf8_mode to not depend on the locale */ - preconfig.coerce_c_locale = 0; - preconfig.utf8_mode = 0; preconfig.isolated = 0; err = _Py_PreInitialize(&preconfig); diff --git a/Python/preconfig.c b/Python/preconfig.c index 011ed53..7ac645d 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -386,7 +386,9 @@ _PyPreConfig_GetGlobalConfig(_PyPreConfig *config) #ifdef MS_WINDOWS COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); #endif - COPY_FLAG(utf8_mode, Py_UTF8Mode); + if (Py_UTF8Mode > 0) { + config->utf8_mode = 1; + } #undef COPY_FLAG #undef COPY_NOT_FLAG diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 7c6948e..ad14472 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -485,7 +485,7 @@ _Py_Initialize_ReconfigureCore(PyInterpreterState **interp_p, _PyCoreConfig_Write(core_config); if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) { - return _Py_INIT_ERR("failed to copy core config"); + return _Py_INIT_NO_MEMORY(); } core_config = &interp->core_config; @@ -548,7 +548,7 @@ pycore_create_interpreter(const _PyCoreConfig *core_config, *interp_p = interp; if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) { - return _Py_INIT_ERR("failed to copy core config"); + return _Py_INIT_NO_MEMORY(); } core_config = &interp->core_config; @@ -785,6 +785,7 @@ _Py_PreInitialize(const _PyPreConfig *src_config) _PyInitError _Py_PreInitializeFromCoreConfig(const _PyCoreConfig *coreconfig) { + assert(coreconfig != NULL); _PyPreConfig config = _PyPreConfig_INIT; _PyCoreConfig_GetCoreConfig(&config, coreconfig); return _Py_PreInitialize(&config); @@ -799,8 +800,10 @@ pyinit_coreconfig(_PyCoreConfig *config, const _PyArgv *args, PyInterpreterState **interp_p) { - if (_PyCoreConfig_Copy(config, src_config) < 0) { - return _Py_INIT_ERR("failed to copy core config"); + if (src_config) { + if (_PyCoreConfig_Copy(config, src_config) < 0) { + return _Py_INIT_NO_MEMORY(); + } } _PyInitError err = _PyCoreConfig_Read(config, args); @@ -839,9 +842,14 @@ _Py_InitializeCore(const _PyCoreConfig *src_config, const _PyArgv *args, PyInterpreterState **interp_p) { - assert(src_config != NULL); + _PyInitError err; - _PyInitError err = _Py_PreInitializeFromCoreConfig(src_config); + if (src_config) { + err = _Py_PreInitializeFromCoreConfig(src_config); + } + else { + err = _Py_PreInitialize(NULL); + } if (_Py_INIT_FAILED(err)) { return err; } @@ -1395,7 +1403,7 @@ new_interpreter(PyThreadState **tstate_p) } if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) { - return _Py_INIT_ERR("failed to copy core config"); + return _Py_INIT_NO_MEMORY(); } core_config = &interp->core_config; |