diff options
author | Victor Stinner <vstinner@redhat.com> | 2019-03-05 11:32:09 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-05 11:32:09 (GMT) |
commit | 5a02e0d1c8a526fc4e80a2fb8b4a9d5bc64c7d82 (patch) | |
tree | d7887a24b69199e50168fda98da920b16dc3197d /Python/coreconfig.c | |
parent | 5b10b9824780b2181158902067912ee9e7b04657 (diff) | |
download | cpython-5a02e0d1c8a526fc4e80a2fb8b4a9d5bc64c7d82.zip cpython-5a02e0d1c8a526fc4e80a2fb8b4a9d5bc64c7d82.tar.gz cpython-5a02e0d1c8a526fc4e80a2fb8b4a9d5bc64c7d82.tar.bz2 |
bpo-36142: Add _PyPreConfig.utf8_mode (GH-12174)
* Move following fields from _PyCoreConfig to _PyPreConfig:
* coerce_c_locale
* coerce_c_locale_warn
* legacy_windows_stdio
* utf8_mode
* _PyPreConfig_ReadFromArgv() is now responsible to choose the
filesystem encoding
* _PyPreConfig_Write() now sets the LC_CTYPE locale
Diffstat (limited to 'Python/coreconfig.c')
-rw-r--r-- | Python/coreconfig.c | 354 |
1 files changed, 20 insertions, 334 deletions
diff --git a/Python/coreconfig.c b/Python/coreconfig.c index a6aa89b..e372de4 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -531,10 +531,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_ATTR(dump_refs); COPY_ATTR(malloc_stats); - COPY_ATTR(coerce_c_locale); - COPY_ATTR(coerce_c_locale_warn); - COPY_ATTR(utf8_mode); - COPY_WSTR_ATTR(pycache_prefix); COPY_WSTR_ATTR(module_search_path_env); COPY_WSTR_ATTR(home); @@ -571,7 +567,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_STR_ATTR(stdio_encoding); COPY_STR_ATTR(stdio_errors); #ifdef MS_WINDOWS - COPY_ATTR(legacy_windows_fs_encoding); COPY_ATTR(legacy_windows_stdio); #endif COPY_ATTR(skip_source_first_line); @@ -592,19 +587,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) const char* _PyCoreConfig_GetEnv(const _PyCoreConfig *config, const char *name) { - assert(config->preconfig.use_environment >= 0); - - if (!config->preconfig.use_environment) { - return NULL; - } - - const char *var = getenv(name); - if (var && var[0] != '\0') { - return var; - } - else { - return NULL; - } + return _PyPreConfig_GetEnv(&config->preconfig, name); } @@ -670,7 +653,6 @@ _PyCoreConfig_GetGlobalConfig(_PyCoreConfig *config) config->ATTR = !(VALUE); \ } - COPY_FLAG(utf8_mode, Py_UTF8Mode); COPY_FLAG(bytes_warning, Py_BytesWarningFlag); COPY_FLAG(inspect, Py_InspectFlag); COPY_FLAG(interactive, Py_InteractiveFlag); @@ -679,7 +661,6 @@ _PyCoreConfig_GetGlobalConfig(_PyCoreConfig *config) COPY_FLAG(verbose, Py_VerboseFlag); COPY_FLAG(quiet, Py_QuietFlag); #ifdef MS_WINDOWS - COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag); #endif COPY_FLAG(_frozen, Py_FrozenFlag); @@ -709,7 +690,6 @@ _PyCoreConfig_SetGlobalConfig(const _PyCoreConfig *config) VAR = !config->ATTR; \ } - COPY_FLAG(utf8_mode, Py_UTF8Mode); COPY_FLAG(bytes_warning, Py_BytesWarningFlag); COPY_FLAG(inspect, Py_InspectFlag); COPY_FLAG(interactive, Py_InteractiveFlag); @@ -718,7 +698,6 @@ _PyCoreConfig_SetGlobalConfig(const _PyCoreConfig *config) COPY_FLAG(verbose, Py_VerboseFlag); COPY_FLAG(quiet, Py_QuietFlag); #ifdef MS_WINDOWS - COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag); #endif COPY_FLAG(_frozen, Py_FrozenFlag); @@ -838,23 +817,7 @@ config_init_executable(_PyCoreConfig *config) static const wchar_t* config_get_xoption(const _PyCoreConfig *config, wchar_t *name) { - int nxoption = config->nxoption; - wchar_t **xoptions = config->xoptions; - for (int i=0; i < nxoption; i++) { - wchar_t *option = xoptions[i]; - size_t len; - wchar_t *sep = wcschr(option, L'='); - if (sep != NULL) { - len = (sep - option); - } - else { - len = wcslen(option); - } - if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') { - return option; - } - } - return NULL; + return _Py_get_xoption(config->nxoption, config->xoptions, name); } @@ -915,67 +878,6 @@ config_init_hash_seed(_PyCoreConfig *config) } -static _PyInitError -config_init_utf8_mode(_PyCoreConfig *config) -{ - const wchar_t *xopt = config_get_xoption(config, L"utf8"); - if (xopt) { - wchar_t *sep = wcschr(xopt, L'='); - if (sep) { - xopt = sep + 1; - if (wcscmp(xopt, L"1") == 0) { - config->utf8_mode = 1; - } - else if (wcscmp(xopt, L"0") == 0) { - config->utf8_mode = 0; - } - else { - return _Py_INIT_USER_ERR("invalid -X utf8 option value"); - } - } - else { - config->utf8_mode = 1; - } - return _Py_INIT_OK(); - } - - const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONUTF8"); - if (opt) { - if (strcmp(opt, "1") == 0) { - config->utf8_mode = 1; - } - else if (strcmp(opt, "0") == 0) { - config->utf8_mode = 0; - } - else { - return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment " - "variable value"); - } - return _Py_INIT_OK(); - } - - return _Py_INIT_OK(); -} - - -static int -config_str_to_int(const char *str, int *result) -{ - const char *endptr = str; - errno = 0; - long value = strtol(str, (char **)&endptr, 10); - if (*endptr != '\0' || errno == ERANGE) { - return -1; - } - if (value < INT_MIN || value > INT_MAX) { - return -1; - } - - *result = (int)value; - return 0; -} - - static int config_wstr_to_int(const wchar_t *wstr, int *result) { @@ -994,27 +896,12 @@ config_wstr_to_int(const wchar_t *wstr, int *result) } -static void -get_env_flag(_PyCoreConfig *config, int *flag, const char *name) -{ - const char *var = _PyCoreConfig_GetEnv(config, name); - if (!var) { - return; - } - int value; - if (config_str_to_int(var, &value) < 0 || value < 0) { - /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */ - value = 1; - } - if (*flag < value) { - *flag = value; - } -} - - static _PyInitError config_read_env_vars(_PyCoreConfig *config) { +#define get_env_flag(CONFIG, ATTR, NAME) \ + _Py_get_env_flag(&(CONFIG)->preconfig, (ATTR), (NAME)) + /* Get environment variables */ get_env_flag(config, &config->parser_debug, "PYTHONDEBUG"); get_env_flag(config, &config->verbose, "PYTHONVERBOSE"); @@ -1040,8 +927,6 @@ config_read_env_vars(_PyCoreConfig *config) } #ifdef MS_WINDOWS - get_env_flag(config, &config->legacy_windows_fs_encoding, - "PYTHONLEGACYWINDOWSFSENCODING"); get_env_flag(config, &config->legacy_windows_stdio, "PYTHONLEGACYWINDOWSSTDIO"); #endif @@ -1057,23 +942,6 @@ config_read_env_vars(_PyCoreConfig *config) config->malloc_stats = 1; } - const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); - if (env) { - if (strcmp(env, "0") == 0) { - if (config->coerce_c_locale < 0) { - config->coerce_c_locale = 0; - } - } - else if (strcmp(env, "warn") == 0) { - config->coerce_c_locale_warn = 1; - } - else { - if (config->coerce_c_locale < 0) { - config->coerce_c_locale = 1; - } - } - } - wchar_t *path; int res = _PyCoreConfig_GetEnvDup(config, &path, L"PYTHONPATH", "PYTHONPATH"); @@ -1090,6 +958,8 @@ config_read_env_vars(_PyCoreConfig *config) } return _Py_INIT_OK(); + +#undef get_env_flag } @@ -1101,7 +971,7 @@ config_init_tracemalloc(_PyCoreConfig *config) const char *env = _PyCoreConfig_GetEnv(config, "PYTHONTRACEMALLOC"); if (env) { - if (!config_str_to_int(env, &nframe)) { + if (!_Py_str_to_int(env, &nframe)) { valid = (nframe >= 0); } else { @@ -1213,37 +1083,6 @@ config_read_complex_options(_PyCoreConfig *config) } -static void -config_init_locale(_PyCoreConfig *config) -{ - /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't - imply that the C locale is always coerced. It is only coerced if - if the LC_CTYPE locale is "C". */ - if (config->coerce_c_locale != 0) { - /* The C locale enables the C locale coercion (PEP 538) */ - if (_Py_LegacyLocaleDetected()) { - config->coerce_c_locale = 1; - } - else { - config->coerce_c_locale = 0; - } - } - -#ifndef MS_WINDOWS - if (config->utf8_mode < 0) { - /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ - const char *ctype_loc = setlocale(LC_CTYPE, NULL); - if (ctype_loc != NULL - && (strcmp(ctype_loc, "C") == 0 - || strcmp(ctype_loc, "POSIX") == 0)) - { - config->utf8_mode = 1; - } - } -#endif -} - - static const char * get_stdio_errors(const _PyCoreConfig *config) { @@ -1365,7 +1204,7 @@ config_init_stdio_encoding(_PyCoreConfig *config) } /* UTF-8 Mode uses UTF-8/surrogateescape */ - if (config->utf8_mode) { + if (config->preconfig.utf8_mode) { if (config->stdio_encoding == NULL) { config->stdio_encoding = _PyMem_RawStrdup("utf-8"); if (config->stdio_encoding == NULL) { @@ -1403,7 +1242,7 @@ static _PyInitError config_init_fs_encoding(_PyCoreConfig *config) { #ifdef MS_WINDOWS - if (config->legacy_windows_fs_encoding) { + if (config->preconfig.legacy_windows_fs_encoding) { /* Legacy Windows filesystem encoding: mbcs/replace */ if (config->filesystem_encoding == NULL) { config->filesystem_encoding = _PyMem_RawStrdup("mbcs"); @@ -1438,7 +1277,7 @@ config_init_fs_encoding(_PyCoreConfig *config) } #else if (config->filesystem_encoding == NULL) { - if (config->utf8_mode) { + if (config->preconfig.utf8_mode) { /* UTF-8 Mode use: utf-8/surrogateescape */ config->filesystem_encoding = _PyMem_RawStrdup("utf-8"); /* errors defaults to surrogateescape above */ @@ -1539,12 +1378,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) config->user_site_directory = 0; } -#ifdef MS_WINDOWS - if (config->legacy_windows_fs_encoding) { - config->utf8_mode = 0; - } -#endif - if (config->preconfig.use_environment) { err = config_read_env_vars(config); if (_Py_INIT_FAILED(err)) { @@ -1565,13 +1398,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) return err; } - if (config->utf8_mode < 0) { - err = config_init_utf8_mode(config); - if (_Py_INIT_FAILED(err)) { - return err; - } - } - if (config->home == NULL) { err = config_init_home(config); if (_Py_INIT_FAILED(err)) { @@ -1593,10 +1419,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) } } - if (config->coerce_c_locale != 0 || config->utf8_mode < 0) { - config_init_locale(config); - } - if (config->_install_importlib) { err = _PyCoreConfig_InitPathConfig(config); if (_Py_INIT_FAILED(err)) { @@ -1623,12 +1445,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) if (config->tracemalloc < 0) { config->tracemalloc = 0; } - if (config->coerce_c_locale < 0) { - config->coerce_c_locale = 0; - } - if (config->utf8_mode < 0) { - config->utf8_mode = 0; - } if (config->argc < 0) { config->argc = 0; } @@ -1645,7 +1461,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) return err; } - assert(config->coerce_c_locale >= 0); assert(config->preconfig.use_environment >= 0); assert(config->filesystem_encoding != NULL); assert(config->filesystem_errors != NULL); @@ -1703,9 +1518,6 @@ config_init_stdio(const _PyCoreConfig *config) void _PyCoreConfig_Write(const _PyCoreConfig *config) { - if (config->coerce_c_locale) { - _Py_CoerceLegacyLocale(config->coerce_c_locale_warn); - } _PyCoreConfig_SetGlobalConfig(config); config_init_stdio(config); } @@ -1769,11 +1581,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config) SET_ITEM_INT(show_alloc_count); SET_ITEM_INT(dump_refs); SET_ITEM_INT(malloc_stats); - SET_ITEM_INT(coerce_c_locale); - SET_ITEM_INT(coerce_c_locale_warn); SET_ITEM_STR(filesystem_encoding); SET_ITEM_STR(filesystem_errors); - SET_ITEM_INT(utf8_mode); SET_ITEM_WSTR(pycache_prefix); SET_ITEM_WSTR(program_name); SET_ITEM_WSTRLIST(argc, argv); @@ -1805,7 +1614,6 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config) SET_ITEM_STR(stdio_encoding); SET_ITEM_STR(stdio_errors); #ifdef MS_WINDOWS - SET_ITEM_INT(legacy_windows_fs_encoding); SET_ITEM_INT(legacy_windows_stdio); #endif SET_ITEM_INT(skip_source_first_line); @@ -2318,33 +2126,6 @@ config_from_cmdline(_PyCoreConfig *config, _PyCmdline *cmdline, } -static _PyInitError -config_read_from_argv_impl(_PyCoreConfig *config, const _PyArgv *args, - const _PyPreConfig *preconfig) -{ - _PyInitError err; - - _PyCmdline cmdline; - memset(&cmdline, 0, sizeof(cmdline)); - cmdline.args = args; - - err = _PyArgv_Decode(cmdline.args, &cmdline.argv); - if (_Py_INIT_FAILED(err)) { - goto done; - } - - err = config_from_cmdline(config, &cmdline, preconfig); - if (_Py_INIT_FAILED(err)) { - goto done; - } - err = _Py_INIT_OK(); - -done: - cmdline_clear(&cmdline); - return err; -} - - /* Read the configuration into _PyCoreConfig and initialize the LC_CTYPE locale: enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538). @@ -2358,118 +2139,23 @@ _PyCoreConfig_ReadFromArgv(_PyCoreConfig *config, const _PyArgv *args, const _PyPreConfig *preconfig) { _PyInitError err; - int init_utf8_mode = Py_UTF8Mode; -#ifdef MS_WINDOWS - int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag; -#endif - _PyCoreConfig save_config = _PyCoreConfig_INIT; - int locale_coerced = 0; - int loops = 0; - char *init_ctype_locale = NULL; - /* copy LC_CTYPE locale */ - const char *loc = setlocale(LC_CTYPE, NULL); - if (loc == NULL) { - err = _Py_INIT_ERR("failed to LC_CTYPE locale"); - goto done; - } - init_ctype_locale = _PyMem_RawStrdup(loc); - if (init_ctype_locale == NULL) { - err = _Py_INIT_NO_MEMORY(); - goto done; - } + _PyCmdline cmdline; + memset(&cmdline, 0, sizeof(cmdline)); + cmdline.args = args; - if (_PyCoreConfig_Copy(&save_config, config) < 0) { - err = _Py_INIT_NO_MEMORY(); + err = _PyArgv_Decode(cmdline.args, &cmdline.argv); + if (_Py_INIT_FAILED(err)) { goto done; } - /* Set LC_CTYPE to the user preferred locale */ - _Py_SetLocaleFromEnv(LC_CTYPE); - - while (1) { - int utf8_mode = config->utf8_mode; - int encoding_changed = 0; - - /* Watchdog to prevent an infinite loop */ - loops++; - if (loops == 3) { - err = _Py_INIT_ERR("Encoding changed twice while " - "reading the configuration"); - goto done; - } - - /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend - on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */ - Py_UTF8Mode = config->utf8_mode; -#ifdef MS_WINDOWS - Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding; -#endif - - err = config_read_from_argv_impl(config, args, preconfig); - if (_Py_INIT_FAILED(err)) { - goto done; - } - if (locale_coerced) { - config->coerce_c_locale = 1; - } - - /* The legacy C locale assumes ASCII as the default text encoding, which - * causes problems not only for the CPython runtime, but also other - * components like GNU readline. - * - * Accordingly, when the CLI detects it, it attempts to coerce it to a - * more capable UTF-8 based alternative. - * - * See the documentation of the PYTHONCOERCECLOCALE setting for more - * details. - */ - if (config->coerce_c_locale && !locale_coerced) { - locale_coerced = 1; - _Py_CoerceLegacyLocale(0); - encoding_changed = 1; - } - - if (utf8_mode == -1) { - if (config->utf8_mode == 1) { - /* UTF-8 Mode enabled */ - encoding_changed = 1; - } - } - else { - if (config->utf8_mode != utf8_mode) { - encoding_changed = 1; - } - } - - if (!encoding_changed) { - break; - } - - /* Reset the configuration before reading again the configuration, - just keep UTF-8 Mode value. */ - int new_utf8_mode = config->utf8_mode; - int new_coerce_c_locale = config->coerce_c_locale; - if (_PyCoreConfig_Copy(config, &save_config) < 0) { - err = _Py_INIT_NO_MEMORY(); - goto done; - } - config->utf8_mode = new_utf8_mode; - config->coerce_c_locale = new_coerce_c_locale; - - /* The encoding changed: read again the configuration - with the new encoding */ + err = config_from_cmdline(config, &cmdline, preconfig); + if (_Py_INIT_FAILED(err)) { + goto done; } err = _Py_INIT_OK(); done: - if (init_ctype_locale != NULL) { - setlocale(LC_CTYPE, init_ctype_locale); - } - _PyCoreConfig_Clear(&save_config); - Py_UTF8Mode = init_utf8_mode ; -#ifdef MS_WINDOWS - Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding; -#endif + cmdline_clear(&cmdline); return err; } |