From 5a02e0d1c8a526fc4e80a2fb8b4a9d5bc64c7d82 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 5 Mar 2019 12:32:09 +0100 Subject: bpo-36142: Add _PyPreConfig.utf8_mode (GH-12174) * Move following fields from _PyCoreConfig to _PyPreConfig: * coerce_c_locale * coerce_c_locale_warn * legacy_windows_stdio * utf8_mode * _PyPreConfig_ReadFromArgv() is now responsible to choose the filesystem encoding * _PyPreConfig_Write() now sets the LC_CTYPE locale --- Include/cpython/coreconfig.h | 54 +++-- Include/internal/pycore_coreconfig.h | 13 ++ Programs/_testembed.c | 6 +- Python/coreconfig.c | 354 ++----------------------------- Python/preconfig.c | 400 ++++++++++++++++++++++++++++++++++- Python/pylifecycle.c | 2 +- Python/sysmodule.c | 2 +- 7 files changed, 465 insertions(+), 366 deletions(-) diff --git a/Include/cpython/coreconfig.h b/Include/cpython/coreconfig.h index 7997d59..306577c 100644 --- a/Include/cpython/coreconfig.h +++ b/Include/cpython/coreconfig.h @@ -60,12 +60,42 @@ typedef struct { Set to 0 by -E command line option. If set to -1 (default), it is set to !Py_IgnoreEnvironmentFlag. */ int use_environment; + + int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */ + int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */ + +#ifdef MS_WINDOWS + /* If greater than 1, use the "mbcs" encoding instead of the UTF-8 + encoding for the filesystem encoding. + + Set to 1 if the PYTHONLEGACYWINDOWSFSENCODING environment variable is + set to a non-empty string. If set to -1 (default), inherit + Py_LegacyWindowsFSEncodingFlag value. + + See PEP 529 for more details. */ + int legacy_windows_fs_encoding; +#endif + + /* Enable UTF-8 mode? + Set by -X utf8 command line option and PYTHONUTF8 environment variable. + If set to -1 (default), inherit Py_UTF8Mode value. */ + int utf8_mode; } _PyPreConfig; +#ifdef MS_WINDOWS +# define _PyPreConfig_WINDOWS_INIT \ + .legacy_windows_fs_encoding = -1, +#else +# define _PyPreConfig_WINDOWS_INIT +#endif + #define _PyPreConfig_INIT \ (_PyPreConfig){ \ + _PyPreConfig_WINDOWS_INIT \ .isolated = -1, \ - .use_environment = -1} + .use_environment = -1, \ + .coerce_c_locale = -1, \ + .utf8_mode = -1} /* --- _PyCoreConfig ---------------------------------------------- */ @@ -95,8 +125,6 @@ typedef struct { int show_alloc_count; /* -X showalloccount */ int dump_refs; /* PYTHONDUMPREFS */ int malloc_stats; /* PYTHONMALLOCSTATS */ - int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */ - int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */ /* Python filesystem encoding and error handler: sys.getfilesystemencoding() and sys.getfilesystemencodeerrors(). @@ -134,11 +162,6 @@ typedef struct { char *filesystem_encoding; char *filesystem_errors; - /* Enable UTF-8 mode? - Set by -X utf8 command line option and PYTHONUTF8 environment variable. - If set to -1 (default), inherit Py_UTF8Mode value. */ - int utf8_mode; - wchar_t *pycache_prefix; /* PYTHONPYCACHEPREFIX, -X pycache_prefix=PATH */ wchar_t *program_name; /* Program name, see also Py_GetProgramName() */ @@ -277,16 +300,6 @@ typedef struct { char *stdio_errors; #ifdef MS_WINDOWS - /* If greater than 1, use the "mbcs" encoding instead of the UTF-8 - encoding for the filesystem encoding. - - Set to 1 if the PYTHONLEGACYWINDOWSFSENCODING environment variable is - set to a non-empty string. If set to -1 (default), inherit - Py_LegacyWindowsFSEncodingFlag value. - - See PEP 529 for more details. */ - int legacy_windows_fs_encoding; - /* If greater than zero, use io.FileIO instead of WindowsConsoleIO for sys standard streams. @@ -340,7 +353,6 @@ typedef struct { #ifdef MS_WINDOWS # define _PyCoreConfig_WINDOWS_INIT \ - .legacy_windows_fs_encoding = -1, \ .legacy_windows_stdio = -1, #else # define _PyCoreConfig_WINDOWS_INIT @@ -348,13 +360,12 @@ typedef struct { #define _PyCoreConfig_INIT \ (_PyCoreConfig){ \ + _PyCoreConfig_WINDOWS_INIT \ .preconfig = _PyPreConfig_INIT, \ .install_signal_handlers = 1, \ .use_hash_seed = -1, \ .faulthandler = -1, \ .tracemalloc = -1, \ - .coerce_c_locale = -1, \ - .utf8_mode = -1, \ .argc = -1, \ .nmodule_search_path = -1, \ .site_import = -1, \ @@ -368,7 +379,6 @@ typedef struct { .quiet = -1, \ .user_site_directory = -1, \ .buffered_stdio = -1, \ - _PyCoreConfig_WINDOWS_INIT \ ._install_importlib = 1, \ ._check_hash_pycs_mode = "default", \ ._frozen = -1} diff --git a/Include/internal/pycore_coreconfig.h b/Include/internal/pycore_coreconfig.h index 5135969..8df182c 100644 --- a/Include/internal/pycore_coreconfig.h +++ b/Include/internal/pycore_coreconfig.h @@ -36,11 +36,24 @@ PyAPI_FUNC(int) _Py_SetArgcArgv(int argc, wchar_t * const *argv); /* --- _PyPreConfig ----------------------------------------------- */ +PyAPI_FUNC(int) _Py_str_to_int( + const char *str, + int *result); +PyAPI_FUNC(const wchar_t*) _Py_get_xoption( + int nxoption, + wchar_t * const *xoptions, + const wchar_t *name); + PyAPI_FUNC(void) _PyPreConfig_Clear(_PyPreConfig *config); PyAPI_FUNC(int) _PyPreConfig_Copy(_PyPreConfig *config, const _PyPreConfig *config2); PyAPI_FUNC(void) _PyPreConfig_GetGlobalConfig(_PyPreConfig *config); PyAPI_FUNC(void) _PyPreConfig_SetGlobalConfig(const _PyPreConfig *config); +PyAPI_FUNC(const char*) _PyPreConfig_GetEnv(const _PyPreConfig *config, + const char *name); +PyAPI_FUNC(void) _Py_get_env_flag(_PyPreConfig *config, + int *flag, + const char *name); PyAPI_FUNC(_PyInitError) _PyPreConfig_Read(_PyPreConfig *config); PyAPI_FUNC(int) _PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict); diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 7b4d8c2..9923f8d 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -461,7 +461,7 @@ static int test_init_from_config(void) putenv("PYTHONUTF8=0"); Py_UTF8Mode = 0; - config.utf8_mode = 1; + config.preconfig.utf8_mode = 1; putenv("PYTHONPYCACHEPREFIX=env_pycache_prefix"); config.pycache_prefix = L"conf_pycache_prefix"; @@ -610,8 +610,8 @@ static int test_init_isolated(void) config.preconfig.isolated = 1; /* Set coerce_c_locale and utf8_mode to not depend on the locale */ - config.coerce_c_locale = 0; - config.utf8_mode = 0; + config.preconfig.coerce_c_locale = 0; + config.preconfig.utf8_mode = 0; /* Use path starting with "./" avoids a search along the PATH */ config.program_name = L"./_testembed"; diff --git a/Python/coreconfig.c b/Python/coreconfig.c index a6aa89b..e372de4 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -531,10 +531,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_ATTR(dump_refs); COPY_ATTR(malloc_stats); - COPY_ATTR(coerce_c_locale); - COPY_ATTR(coerce_c_locale_warn); - COPY_ATTR(utf8_mode); - COPY_WSTR_ATTR(pycache_prefix); COPY_WSTR_ATTR(module_search_path_env); COPY_WSTR_ATTR(home); @@ -571,7 +567,6 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) COPY_STR_ATTR(stdio_encoding); COPY_STR_ATTR(stdio_errors); #ifdef MS_WINDOWS - COPY_ATTR(legacy_windows_fs_encoding); COPY_ATTR(legacy_windows_stdio); #endif COPY_ATTR(skip_source_first_line); @@ -592,19 +587,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2) const char* _PyCoreConfig_GetEnv(const _PyCoreConfig *config, const char *name) { - assert(config->preconfig.use_environment >= 0); - - if (!config->preconfig.use_environment) { - return NULL; - } - - const char *var = getenv(name); - if (var && var[0] != '\0') { - return var; - } - else { - return NULL; - } + return _PyPreConfig_GetEnv(&config->preconfig, name); } @@ -670,7 +653,6 @@ _PyCoreConfig_GetGlobalConfig(_PyCoreConfig *config) config->ATTR = !(VALUE); \ } - COPY_FLAG(utf8_mode, Py_UTF8Mode); COPY_FLAG(bytes_warning, Py_BytesWarningFlag); COPY_FLAG(inspect, Py_InspectFlag); COPY_FLAG(interactive, Py_InteractiveFlag); @@ -679,7 +661,6 @@ _PyCoreConfig_GetGlobalConfig(_PyCoreConfig *config) COPY_FLAG(verbose, Py_VerboseFlag); COPY_FLAG(quiet, Py_QuietFlag); #ifdef MS_WINDOWS - COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag); #endif COPY_FLAG(_frozen, Py_FrozenFlag); @@ -709,7 +690,6 @@ _PyCoreConfig_SetGlobalConfig(const _PyCoreConfig *config) VAR = !config->ATTR; \ } - COPY_FLAG(utf8_mode, Py_UTF8Mode); COPY_FLAG(bytes_warning, Py_BytesWarningFlag); COPY_FLAG(inspect, Py_InspectFlag); COPY_FLAG(interactive, Py_InteractiveFlag); @@ -718,7 +698,6 @@ _PyCoreConfig_SetGlobalConfig(const _PyCoreConfig *config) COPY_FLAG(verbose, Py_VerboseFlag); COPY_FLAG(quiet, Py_QuietFlag); #ifdef MS_WINDOWS - COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); COPY_FLAG(legacy_windows_stdio, Py_LegacyWindowsStdioFlag); #endif COPY_FLAG(_frozen, Py_FrozenFlag); @@ -838,23 +817,7 @@ config_init_executable(_PyCoreConfig *config) static const wchar_t* config_get_xoption(const _PyCoreConfig *config, wchar_t *name) { - int nxoption = config->nxoption; - wchar_t **xoptions = config->xoptions; - for (int i=0; i < nxoption; i++) { - wchar_t *option = xoptions[i]; - size_t len; - wchar_t *sep = wcschr(option, L'='); - if (sep != NULL) { - len = (sep - option); - } - else { - len = wcslen(option); - } - if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') { - return option; - } - } - return NULL; + return _Py_get_xoption(config->nxoption, config->xoptions, name); } @@ -915,67 +878,6 @@ config_init_hash_seed(_PyCoreConfig *config) } -static _PyInitError -config_init_utf8_mode(_PyCoreConfig *config) -{ - const wchar_t *xopt = config_get_xoption(config, L"utf8"); - if (xopt) { - wchar_t *sep = wcschr(xopt, L'='); - if (sep) { - xopt = sep + 1; - if (wcscmp(xopt, L"1") == 0) { - config->utf8_mode = 1; - } - else if (wcscmp(xopt, L"0") == 0) { - config->utf8_mode = 0; - } - else { - return _Py_INIT_USER_ERR("invalid -X utf8 option value"); - } - } - else { - config->utf8_mode = 1; - } - return _Py_INIT_OK(); - } - - const char *opt = _PyCoreConfig_GetEnv(config, "PYTHONUTF8"); - if (opt) { - if (strcmp(opt, "1") == 0) { - config->utf8_mode = 1; - } - else if (strcmp(opt, "0") == 0) { - config->utf8_mode = 0; - } - else { - return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment " - "variable value"); - } - return _Py_INIT_OK(); - } - - return _Py_INIT_OK(); -} - - -static int -config_str_to_int(const char *str, int *result) -{ - const char *endptr = str; - errno = 0; - long value = strtol(str, (char **)&endptr, 10); - if (*endptr != '\0' || errno == ERANGE) { - return -1; - } - if (value < INT_MIN || value > INT_MAX) { - return -1; - } - - *result = (int)value; - return 0; -} - - static int config_wstr_to_int(const wchar_t *wstr, int *result) { @@ -994,27 +896,12 @@ config_wstr_to_int(const wchar_t *wstr, int *result) } -static void -get_env_flag(_PyCoreConfig *config, int *flag, const char *name) -{ - const char *var = _PyCoreConfig_GetEnv(config, name); - if (!var) { - return; - } - int value; - if (config_str_to_int(var, &value) < 0 || value < 0) { - /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */ - value = 1; - } - if (*flag < value) { - *flag = value; - } -} - - static _PyInitError config_read_env_vars(_PyCoreConfig *config) { +#define get_env_flag(CONFIG, ATTR, NAME) \ + _Py_get_env_flag(&(CONFIG)->preconfig, (ATTR), (NAME)) + /* Get environment variables */ get_env_flag(config, &config->parser_debug, "PYTHONDEBUG"); get_env_flag(config, &config->verbose, "PYTHONVERBOSE"); @@ -1040,8 +927,6 @@ config_read_env_vars(_PyCoreConfig *config) } #ifdef MS_WINDOWS - get_env_flag(config, &config->legacy_windows_fs_encoding, - "PYTHONLEGACYWINDOWSFSENCODING"); get_env_flag(config, &config->legacy_windows_stdio, "PYTHONLEGACYWINDOWSSTDIO"); #endif @@ -1057,23 +942,6 @@ config_read_env_vars(_PyCoreConfig *config) config->malloc_stats = 1; } - const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); - if (env) { - if (strcmp(env, "0") == 0) { - if (config->coerce_c_locale < 0) { - config->coerce_c_locale = 0; - } - } - else if (strcmp(env, "warn") == 0) { - config->coerce_c_locale_warn = 1; - } - else { - if (config->coerce_c_locale < 0) { - config->coerce_c_locale = 1; - } - } - } - wchar_t *path; int res = _PyCoreConfig_GetEnvDup(config, &path, L"PYTHONPATH", "PYTHONPATH"); @@ -1090,6 +958,8 @@ config_read_env_vars(_PyCoreConfig *config) } return _Py_INIT_OK(); + +#undef get_env_flag } @@ -1101,7 +971,7 @@ config_init_tracemalloc(_PyCoreConfig *config) const char *env = _PyCoreConfig_GetEnv(config, "PYTHONTRACEMALLOC"); if (env) { - if (!config_str_to_int(env, &nframe)) { + if (!_Py_str_to_int(env, &nframe)) { valid = (nframe >= 0); } else { @@ -1213,37 +1083,6 @@ config_read_complex_options(_PyCoreConfig *config) } -static void -config_init_locale(_PyCoreConfig *config) -{ - /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't - imply that the C locale is always coerced. It is only coerced if - if the LC_CTYPE locale is "C". */ - if (config->coerce_c_locale != 0) { - /* The C locale enables the C locale coercion (PEP 538) */ - if (_Py_LegacyLocaleDetected()) { - config->coerce_c_locale = 1; - } - else { - config->coerce_c_locale = 0; - } - } - -#ifndef MS_WINDOWS - if (config->utf8_mode < 0) { - /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ - const char *ctype_loc = setlocale(LC_CTYPE, NULL); - if (ctype_loc != NULL - && (strcmp(ctype_loc, "C") == 0 - || strcmp(ctype_loc, "POSIX") == 0)) - { - config->utf8_mode = 1; - } - } -#endif -} - - static const char * get_stdio_errors(const _PyCoreConfig *config) { @@ -1365,7 +1204,7 @@ config_init_stdio_encoding(_PyCoreConfig *config) } /* UTF-8 Mode uses UTF-8/surrogateescape */ - if (config->utf8_mode) { + if (config->preconfig.utf8_mode) { if (config->stdio_encoding == NULL) { config->stdio_encoding = _PyMem_RawStrdup("utf-8"); if (config->stdio_encoding == NULL) { @@ -1403,7 +1242,7 @@ static _PyInitError config_init_fs_encoding(_PyCoreConfig *config) { #ifdef MS_WINDOWS - if (config->legacy_windows_fs_encoding) { + if (config->preconfig.legacy_windows_fs_encoding) { /* Legacy Windows filesystem encoding: mbcs/replace */ if (config->filesystem_encoding == NULL) { config->filesystem_encoding = _PyMem_RawStrdup("mbcs"); @@ -1438,7 +1277,7 @@ config_init_fs_encoding(_PyCoreConfig *config) } #else if (config->filesystem_encoding == NULL) { - if (config->utf8_mode) { + if (config->preconfig.utf8_mode) { /* UTF-8 Mode use: utf-8/surrogateescape */ config->filesystem_encoding = _PyMem_RawStrdup("utf-8"); /* errors defaults to surrogateescape above */ @@ -1539,12 +1378,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) config->user_site_directory = 0; } -#ifdef MS_WINDOWS - if (config->legacy_windows_fs_encoding) { - config->utf8_mode = 0; - } -#endif - if (config->preconfig.use_environment) { err = config_read_env_vars(config); if (_Py_INIT_FAILED(err)) { @@ -1565,13 +1398,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) return err; } - if (config->utf8_mode < 0) { - err = config_init_utf8_mode(config); - if (_Py_INIT_FAILED(err)) { - return err; - } - } - if (config->home == NULL) { err = config_init_home(config); if (_Py_INIT_FAILED(err)) { @@ -1593,10 +1419,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) } } - if (config->coerce_c_locale != 0 || config->utf8_mode < 0) { - config_init_locale(config); - } - if (config->_install_importlib) { err = _PyCoreConfig_InitPathConfig(config); if (_Py_INIT_FAILED(err)) { @@ -1623,12 +1445,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) if (config->tracemalloc < 0) { config->tracemalloc = 0; } - if (config->coerce_c_locale < 0) { - config->coerce_c_locale = 0; - } - if (config->utf8_mode < 0) { - config->utf8_mode = 0; - } if (config->argc < 0) { config->argc = 0; } @@ -1645,7 +1461,6 @@ _PyCoreConfig_Read(_PyCoreConfig *config, const _PyPreConfig *preconfig) return err; } - assert(config->coerce_c_locale >= 0); assert(config->preconfig.use_environment >= 0); assert(config->filesystem_encoding != NULL); assert(config->filesystem_errors != NULL); @@ -1703,9 +1518,6 @@ config_init_stdio(const _PyCoreConfig *config) void _PyCoreConfig_Write(const _PyCoreConfig *config) { - if (config->coerce_c_locale) { - _Py_CoerceLegacyLocale(config->coerce_c_locale_warn); - } _PyCoreConfig_SetGlobalConfig(config); config_init_stdio(config); } @@ -1769,11 +1581,8 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config) SET_ITEM_INT(show_alloc_count); SET_ITEM_INT(dump_refs); SET_ITEM_INT(malloc_stats); - SET_ITEM_INT(coerce_c_locale); - SET_ITEM_INT(coerce_c_locale_warn); SET_ITEM_STR(filesystem_encoding); SET_ITEM_STR(filesystem_errors); - SET_ITEM_INT(utf8_mode); SET_ITEM_WSTR(pycache_prefix); SET_ITEM_WSTR(program_name); SET_ITEM_WSTRLIST(argc, argv); @@ -1805,7 +1614,6 @@ _PyCoreConfig_AsDict(const _PyCoreConfig *config) SET_ITEM_STR(stdio_encoding); SET_ITEM_STR(stdio_errors); #ifdef MS_WINDOWS - SET_ITEM_INT(legacy_windows_fs_encoding); SET_ITEM_INT(legacy_windows_stdio); #endif SET_ITEM_INT(skip_source_first_line); @@ -2318,33 +2126,6 @@ config_from_cmdline(_PyCoreConfig *config, _PyCmdline *cmdline, } -static _PyInitError -config_read_from_argv_impl(_PyCoreConfig *config, const _PyArgv *args, - const _PyPreConfig *preconfig) -{ - _PyInitError err; - - _PyCmdline cmdline; - memset(&cmdline, 0, sizeof(cmdline)); - cmdline.args = args; - - err = _PyArgv_Decode(cmdline.args, &cmdline.argv); - if (_Py_INIT_FAILED(err)) { - goto done; - } - - err = config_from_cmdline(config, &cmdline, preconfig); - if (_Py_INIT_FAILED(err)) { - goto done; - } - err = _Py_INIT_OK(); - -done: - cmdline_clear(&cmdline); - return err; -} - - /* Read the configuration into _PyCoreConfig and initialize the LC_CTYPE locale: enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538). @@ -2358,118 +2139,23 @@ _PyCoreConfig_ReadFromArgv(_PyCoreConfig *config, const _PyArgv *args, const _PyPreConfig *preconfig) { _PyInitError err; - int init_utf8_mode = Py_UTF8Mode; -#ifdef MS_WINDOWS - int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag; -#endif - _PyCoreConfig save_config = _PyCoreConfig_INIT; - int locale_coerced = 0; - int loops = 0; - char *init_ctype_locale = NULL; - /* copy LC_CTYPE locale */ - const char *loc = setlocale(LC_CTYPE, NULL); - if (loc == NULL) { - err = _Py_INIT_ERR("failed to LC_CTYPE locale"); - goto done; - } - init_ctype_locale = _PyMem_RawStrdup(loc); - if (init_ctype_locale == NULL) { - err = _Py_INIT_NO_MEMORY(); - goto done; - } + _PyCmdline cmdline; + memset(&cmdline, 0, sizeof(cmdline)); + cmdline.args = args; - if (_PyCoreConfig_Copy(&save_config, config) < 0) { - err = _Py_INIT_NO_MEMORY(); + err = _PyArgv_Decode(cmdline.args, &cmdline.argv); + if (_Py_INIT_FAILED(err)) { goto done; } - /* Set LC_CTYPE to the user preferred locale */ - _Py_SetLocaleFromEnv(LC_CTYPE); - - while (1) { - int utf8_mode = config->utf8_mode; - int encoding_changed = 0; - - /* Watchdog to prevent an infinite loop */ - loops++; - if (loops == 3) { - err = _Py_INIT_ERR("Encoding changed twice while " - "reading the configuration"); - goto done; - } - - /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend - on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */ - Py_UTF8Mode = config->utf8_mode; -#ifdef MS_WINDOWS - Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding; -#endif - - err = config_read_from_argv_impl(config, args, preconfig); - if (_Py_INIT_FAILED(err)) { - goto done; - } - if (locale_coerced) { - config->coerce_c_locale = 1; - } - - /* The legacy C locale assumes ASCII as the default text encoding, which - * causes problems not only for the CPython runtime, but also other - * components like GNU readline. - * - * Accordingly, when the CLI detects it, it attempts to coerce it to a - * more capable UTF-8 based alternative. - * - * See the documentation of the PYTHONCOERCECLOCALE setting for more - * details. - */ - if (config->coerce_c_locale && !locale_coerced) { - locale_coerced = 1; - _Py_CoerceLegacyLocale(0); - encoding_changed = 1; - } - - if (utf8_mode == -1) { - if (config->utf8_mode == 1) { - /* UTF-8 Mode enabled */ - encoding_changed = 1; - } - } - else { - if (config->utf8_mode != utf8_mode) { - encoding_changed = 1; - } - } - - if (!encoding_changed) { - break; - } - - /* Reset the configuration before reading again the configuration, - just keep UTF-8 Mode value. */ - int new_utf8_mode = config->utf8_mode; - int new_coerce_c_locale = config->coerce_c_locale; - if (_PyCoreConfig_Copy(config, &save_config) < 0) { - err = _Py_INIT_NO_MEMORY(); - goto done; - } - config->utf8_mode = new_utf8_mode; - config->coerce_c_locale = new_coerce_c_locale; - - /* The encoding changed: read again the configuration - with the new encoding */ + err = config_from_cmdline(config, &cmdline, preconfig); + if (_Py_INIT_FAILED(err)) { + goto done; } err = _Py_INIT_OK(); done: - if (init_ctype_locale != NULL) { - setlocale(LC_CTYPE, init_ctype_locale); - } - _PyCoreConfig_Clear(&save_config); - Py_UTF8Mode = init_utf8_mode ; -#ifdef MS_WINDOWS - Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding; -#endif + cmdline_clear(&cmdline); return err; } diff --git a/Python/preconfig.c b/Python/preconfig.c index af70f38..3befecf 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -1,6 +1,8 @@ #include "Python.h" #include "pycore_coreconfig.h" #include "pycore_getopt.h" +#include "pycore_pystate.h" /* _PyRuntime_Initialize() */ +#include /* setlocale() */ #define DECODE_LOCALE_ERR(NAME, LEN) \ @@ -99,6 +101,8 @@ typedef struct { const _PyArgv *args; int argc; wchar_t **argv; + int nxoption; /* Number of -X options */ + wchar_t **xoptions; /* -X options */ } _PyPreCmdline; @@ -109,6 +113,10 @@ precmdline_clear(_PyPreCmdline *cmdline) _Py_wstrlist_clear(cmdline->args->argc, cmdline->argv); } cmdline->argv = NULL; + + _Py_wstrlist_clear(cmdline->nxoption, cmdline->xoptions); + cmdline->nxoption = 0; + cmdline->xoptions = NULL; } @@ -129,6 +137,12 @@ _PyPreConfig_Copy(_PyPreConfig *config, const _PyPreConfig *config2) COPY_ATTR(isolated); COPY_ATTR(use_environment); + COPY_ATTR(coerce_c_locale); + COPY_ATTR(coerce_c_locale_warn); +#ifdef MS_WINDOWS + COPY_ATTR(legacy_windows_fs_encoding); +#endif + COPY_ATTR(utf8_mode); #undef COPY_ATTR return 0; @@ -149,6 +163,10 @@ _PyPreConfig_GetGlobalConfig(_PyPreConfig *config) COPY_FLAG(isolated, Py_IsolatedFlag); COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag); +#ifdef MS_WINDOWS + COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); +#endif + COPY_FLAG(utf8_mode, Py_UTF8Mode); #undef COPY_FLAG #undef COPY_NOT_FLAG @@ -169,14 +187,161 @@ _PyPreConfig_SetGlobalConfig(const _PyPreConfig *config) COPY_FLAG(isolated, Py_IsolatedFlag); COPY_NOT_FLAG(use_environment, Py_IgnoreEnvironmentFlag); +#ifdef MS_WINDOWS + COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag); +#endif + COPY_FLAG(utf8_mode, Py_UTF8Mode); #undef COPY_FLAG #undef COPY_NOT_FLAG } -_PyInitError -_PyPreConfig_Read(_PyPreConfig *config) +const char* +_PyPreConfig_GetEnv(const _PyPreConfig *config, const char *name) +{ + assert(config->use_environment >= 0); + + if (!config->use_environment) { + return NULL; + } + + const char *var = getenv(name); + if (var && var[0] != '\0') { + return var; + } + else { + return NULL; + } +} + + +int +_Py_str_to_int(const char *str, int *result) +{ + const char *endptr = str; + errno = 0; + long value = strtol(str, (char **)&endptr, 10); + if (*endptr != '\0' || errno == ERANGE) { + return -1; + } + if (value < INT_MIN || value > INT_MAX) { + return -1; + } + + *result = (int)value; + return 0; +} + + +void +_Py_get_env_flag(_PyPreConfig *config, int *flag, const char *name) +{ + const char *var = _PyPreConfig_GetEnv(config, name); + if (!var) { + return; + } + int value; + if (_Py_str_to_int(var, &value) < 0 || value < 0) { + /* PYTHONDEBUG=text and PYTHONDEBUG=-2 behave as PYTHONDEBUG=1 */ + value = 1; + } + if (*flag < value) { + *flag = value; + } +} + + +const wchar_t* +_Py_get_xoption(int nxoption, wchar_t * const *xoptions, const wchar_t *name) +{ + for (int i=0; i < nxoption; i++) { + const wchar_t *option = xoptions[i]; + size_t len; + wchar_t *sep = wcschr(option, L'='); + if (sep != NULL) { + len = (sep - option); + } + else { + len = wcslen(option); + } + if (wcsncmp(option, name, len) == 0 && name[len] == L'\0') { + return option; + } + } + return NULL; +} + + +static _PyInitError +preconfig_init_utf8_mode(_PyPreConfig *config, const _PyPreCmdline *cmdline) +{ + const wchar_t *xopt; + if (cmdline) { + xopt = _Py_get_xoption(cmdline->nxoption, cmdline->xoptions, L"utf8"); + } + else { + xopt = NULL; + } + if (xopt) { + wchar_t *sep = wcschr(xopt, L'='); + if (sep) { + xopt = sep + 1; + if (wcscmp(xopt, L"1") == 0) { + config->utf8_mode = 1; + } + else if (wcscmp(xopt, L"0") == 0) { + config->utf8_mode = 0; + } + else { + return _Py_INIT_USER_ERR("invalid -X utf8 option value"); + } + } + else { + config->utf8_mode = 1; + } + return _Py_INIT_OK(); + } + + const char *opt = _PyPreConfig_GetEnv(config, "PYTHONUTF8"); + if (opt) { + if (strcmp(opt, "1") == 0) { + config->utf8_mode = 1; + } + else if (strcmp(opt, "0") == 0) { + config->utf8_mode = 0; + } + else { + return _Py_INIT_USER_ERR("invalid PYTHONUTF8 environment " + "variable value"); + } + return _Py_INIT_OK(); + } + + return _Py_INIT_OK(); +} + + +static void +preconfig_init_locale(_PyPreConfig *config) +{ + /* Test also if coerce_c_locale equals 1: PYTHONCOERCECLOCALE=1 doesn't + imply that the C locale is always coerced. It is only coerced if + if the LC_CTYPE locale is "C". */ + if (config->coerce_c_locale != 0) { + /* The C locale enables the C locale coercion (PEP 538) */ + if (_Py_LegacyLocaleDetected()) { + config->coerce_c_locale = 1; + } + else { + config->coerce_c_locale = 0; + } + } +} + + +static _PyInitError +preconfig_read(_PyPreConfig *config, const _PyPreCmdline *cmdline) { _PyPreConfig_GetGlobalConfig(config); @@ -189,6 +354,69 @@ _PyPreConfig_Read(_PyPreConfig *config) config->use_environment = 0; } + if (config->use_environment) { +#ifdef MS_WINDOWS + _Py_get_env_flag(config, &config->legacy_windows_fs_encoding, + "PYTHONLEGACYWINDOWSFSENCODING"); +#endif + + const char *env = _PyPreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); + if (env) { + if (strcmp(env, "0") == 0) { + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 0; + } + } + else if (strcmp(env, "warn") == 0) { + config->coerce_c_locale_warn = 1; + } + else { + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 1; + } + } + } + } + +#ifdef MS_WINDOWS + if (config->legacy_windows_fs_encoding) { + config->utf8_mode = 0; + } +#endif + + if (config->utf8_mode < 0) { + _PyInitError err = preconfig_init_utf8_mode(config, cmdline); + if (_Py_INIT_FAILED(err)) { + return err; + } + } + + if (config->coerce_c_locale != 0) { + preconfig_init_locale(config); + } + +#ifndef MS_WINDOWS + if (config->utf8_mode < 0) { + /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ + const char *ctype_loc = setlocale(LC_CTYPE, NULL); + if (ctype_loc != NULL + && (strcmp(ctype_loc, "C") == 0 + || strcmp(ctype_loc, "POSIX") == 0)) + { + config->utf8_mode = 1; + } + } +#endif + + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 0; + } + if (config->utf8_mode < 0) { + config->utf8_mode = 0; + } + + assert(config->coerce_c_locale >= 0); + assert(config->utf8_mode >= 0); assert(config->isolated >= 0); assert(config->use_environment >= 0); @@ -196,6 +424,13 @@ _PyPreConfig_Read(_PyPreConfig *config) } +_PyInitError +_PyPreConfig_Read(_PyPreConfig *config) +{ + return preconfig_read(config, NULL); +} + + int _PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict) { @@ -216,6 +451,12 @@ _PyPreConfig_AsDict(const _PyPreConfig *config, PyObject *dict) SET_ITEM_INT(isolated); SET_ITEM_INT(use_environment); + SET_ITEM_INT(coerce_c_locale); + SET_ITEM_INT(coerce_c_locale_warn); + SET_ITEM_INT(utf8_mode); +#ifdef MS_WINDOWS + SET_ITEM_INT(legacy_windows_fs_encoding); +#endif return 0; fail: @@ -251,6 +492,18 @@ preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline) config->isolated++; break; + case 'X': + { + _PyInitError err; + err = _Py_wstrlist_append(&cmdline->nxoption, + &cmdline->xoptions, + _PyOS_optarg); + if (_Py_INIT_FAILED(err)) { + return err; + } + break; + } + default: /* ignore other argument: handled by _PyCoreConfig_ReadFromArgv() */ @@ -262,8 +515,8 @@ preconfig_parse_cmdline(_PyPreConfig *config, _PyPreCmdline *cmdline) } -_PyInitError -_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args) +static _PyInitError +preconfig_from_argv(_PyPreConfig *config, const _PyArgv *args) { _PyInitError err; @@ -281,7 +534,7 @@ _PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args) goto done; } - err = _PyPreConfig_Read(config); + err = preconfig_read(config, &cmdline); if (_Py_INIT_FAILED(err)) { goto done; } @@ -293,7 +546,144 @@ done: } +/* Read the preconfiguration. */ +_PyInitError +_PyPreConfig_ReadFromArgv(_PyPreConfig *config, const _PyArgv *args) +{ + _PyInitError err; + + err = _PyRuntime_Initialize(); + if (_Py_INIT_FAILED(err)) { + return err; + } + + char *init_ctype_locale = NULL; + int init_utf8_mode = Py_UTF8Mode; +#ifdef MS_WINDOWS + int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag; +#endif + _PyPreConfig save_config = _PyPreConfig_INIT; + int locale_coerced = 0; + int loops = 0; + + /* copy LC_CTYPE locale */ + const char *loc = setlocale(LC_CTYPE, NULL); + if (loc == NULL) { + err = _Py_INIT_ERR("failed to LC_CTYPE locale"); + goto done; + } + init_ctype_locale = _PyMem_RawStrdup(loc); + if (init_ctype_locale == NULL) { + err = _Py_INIT_NO_MEMORY(); + goto done; + } + + if (_PyPreConfig_Copy(&save_config, config) < 0) { + err = _Py_INIT_NO_MEMORY(); + goto done; + } + + /* Set LC_CTYPE to the user preferred locale */ + _Py_SetLocaleFromEnv(LC_CTYPE); + + while (1) { + int utf8_mode = config->utf8_mode; + + /* Watchdog to prevent an infinite loop */ + loops++; + if (loops == 3) { + err = _Py_INIT_ERR("Encoding changed twice while " + "reading the configuration"); + goto done; + } + + /* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend + on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */ + Py_UTF8Mode = config->utf8_mode; +#ifdef MS_WINDOWS + Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding; +#endif + + err = preconfig_from_argv(config, args); + if (_Py_INIT_FAILED(err)) { + goto done; + } + + if (locale_coerced) { + config->coerce_c_locale = 1; + } + + /* The legacy C locale assumes ASCII as the default text encoding, which + * causes problems not only for the CPython runtime, but also other + * components like GNU readline. + * + * Accordingly, when the CLI detects it, it attempts to coerce it to a + * more capable UTF-8 based alternative. + * + * See the documentation of the PYTHONCOERCECLOCALE setting for more + * details. + */ + int encoding_changed = 0; + if (config->coerce_c_locale && !locale_coerced) { + locale_coerced = 1; + _Py_CoerceLegacyLocale(0); + encoding_changed = 1; + } + + if (utf8_mode == -1) { + if (config->utf8_mode == 1) { + /* UTF-8 Mode enabled */ + encoding_changed = 1; + } + } + else { + if (config->utf8_mode != utf8_mode) { + encoding_changed = 1; + } + } + + if (!encoding_changed) { + break; + } + + /* Reset the configuration before reading again the configuration, + just keep UTF-8 Mode value. */ + int new_utf8_mode = config->utf8_mode; + int new_coerce_c_locale = config->coerce_c_locale; + if (_PyPreConfig_Copy(config, &save_config) < 0) { + err = _Py_INIT_NO_MEMORY(); + goto done; + } + config->utf8_mode = new_utf8_mode; + config->coerce_c_locale = new_coerce_c_locale; + + /* The encoding changed: read again the configuration + with the new encoding */ + } + err = _Py_INIT_OK(); + +done: + if (init_ctype_locale != NULL) { + setlocale(LC_CTYPE, init_ctype_locale); + } + _PyPreConfig_Clear(&save_config); + Py_UTF8Mode = init_utf8_mode ; +#ifdef MS_WINDOWS + Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding; +#endif + return err; +} + + void _PyPreConfig_Write(const _PyPreConfig *config) { + _PyPreConfig_SetGlobalConfig(config); + + if (config->coerce_c_locale) { + _Py_CoerceLegacyLocale(config->coerce_c_locale_warn); + } + + /* Set LC_CTYPE to the user preferred locale */ + _Py_SetLocaleFromEnv(LC_CTYPE); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 7cf4a6d..dec8904 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -287,7 +287,7 @@ static const char *_C_LOCALE_WARNING = static void _emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config) { - if (core_config->coerce_c_locale_warn && _Py_LegacyLocaleDetected()) { + if (core_config->preconfig.coerce_c_locale_warn && _Py_LegacyLocaleDetected()) { PySys_FormatStderr("%s", _C_LOCALE_WARNING); } } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 4b12280..50ba1a7 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2181,7 +2181,7 @@ make_flags(void) SetFlag(config->use_hash_seed == 0 || config->hash_seed != 0); SetFlag(config->preconfig.isolated); PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode)); - SetFlag(config->utf8_mode); + SetFlag(config->preconfig.utf8_mode); #undef SetFlag if (PyErr_Occurred()) { -- cgit v0.12