diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2017-12-16 03:54:22 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-16 03:54:22 (GMT) |
commit | 9454060e84a669dde63824d9e2fcaf295e34f687 (patch) | |
tree | 4c40a6e1bd11aa75819acb7efce4981fc6ba7611 /Python | |
parent | e796b2fe26f220107ac50667de6cc86c82b465e3 (diff) | |
download | cpython-9454060e84a669dde63824d9e2fcaf295e34f687.zip cpython-9454060e84a669dde63824d9e2fcaf295e34f687.tar.gz cpython-9454060e84a669dde63824d9e2fcaf295e34f687.tar.bz2 |
bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)
bpo-29240, bpo-32030: If the encoding change (C locale coerced or
UTF-8 Mode changed), Py_Main() now reads again the configuration with
the new encoding.
Changes:
* Add _Py_UnixMain() called by main().
* Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be
called multipled times.
* Rename pymain_parse_cmdline_envvars() to pymain_read_conf().
* Py_Main() now clears orig_argc and orig_argv at exit.
* Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is
no need anymore to get two copies of the wchar_t** argv.
* _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn.
* Py_UTF8Mode is now initialized to -1.
* Locale coercion (PEP 538) now respects -I and -E options.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bltinmodule.c | 7 | ||||
-rw-r--r-- | Python/fileutils.c | 4 | ||||
-rw-r--r-- | Python/pylifecycle.c | 80 |
3 files changed, 34 insertions, 57 deletions
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 604493d..e702f7c 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -29,9 +29,10 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ int Py_HasFileSystemDefaultEncoding = 0; #endif const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape"; -/* UTF-8 mode (PEP 540): if non-zero, use the UTF-8 encoding, and change stdin - and stdout error handler to "surrogateescape". */ -int Py_UTF8Mode = 0; +/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change + stdin and stdout error handler to "surrogateescape". It is equal to + -1 by default: unknown, will be set by Py_Main() */ +int Py_UTF8Mode = -1; _Py_IDENTIFIER(__builtins__); _Py_IDENTIFIER(__dict__); diff --git a/Python/fileutils.c b/Python/fileutils.c index 4b69049..c4d495d 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -393,7 +393,7 @@ Py_DecodeLocale(const char* arg, size_t *size) #if defined(__APPLE__) || defined(__ANDROID__) return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); #else - if (Py_UTF8Mode) { + if (Py_UTF8Mode == 1) { return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); } @@ -539,7 +539,7 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos) #if defined(__APPLE__) || defined(__ANDROID__) return _Py_EncodeLocaleUTF8(text, error_pos); #else /* __APPLE__ */ - if (Py_UTF8Mode) { + if (Py_UTF8Mode == 1) { return _Py_EncodeLocaleUTF8(text, error_pos); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 8c62607..6500995 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -385,18 +385,10 @@ static const char *_C_LOCALE_WARNING = "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " "locales is recommended.\n"; -static int -_legacy_locale_warnings_enabled(void) -{ - const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); - return (coerce_c_locale != NULL && - strncmp(coerce_c_locale, "warn", 5) == 0); -} - static void -_emit_stderr_warning_for_legacy_locale(void) +_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config) { - if (_legacy_locale_warnings_enabled()) { + if (core_config->coerce_c_locale_warn) { if (_Py_LegacyLocaleDetected()) { fprintf(stderr, "%s", _C_LOCALE_WARNING); } @@ -440,12 +432,12 @@ get_default_standard_stream_error_handler(void) } #ifdef PY_COERCE_C_LOCALE -static const char _C_LOCALE_COERCION_WARNING[] = +static const char C_LOCALE_COERCION_WARNING[] = "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale " "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n"; static void -_coerce_default_locale_settings(const _LocaleCoercionTarget *target) +_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target) { const char *newloc = target->locale_name; @@ -458,8 +450,8 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) "Error setting LC_CTYPE, skipping C locale coercion\n"); return; } - if (_legacy_locale_warnings_enabled()) { - fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); + if (config->coerce_c_locale_warn) { + fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc); } /* Reconfigure with the overridden environment variables */ @@ -468,47 +460,31 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) #endif void -_Py_CoerceLegacyLocale(void) +_Py_CoerceLegacyLocale(const _PyCoreConfig *config) { #ifdef PY_COERCE_C_LOCALE - /* We ignore the Python -E and -I flags here, as the CLI needs to sort out - * the locale settings *before* we try to do anything with the command - * line arguments. For cross-platform debugging purposes, we also need - * to give end users a way to force even scripts that are otherwise - * isolated from their environment to use the legacy ASCII-centric C - * locale. - * - * Ignoring -E and -I is safe from a security perspective, as we only use - * the setting to turn *off* the implicit locale coercion, and anyone with - * access to the process environment already has the ability to set - * `LC_ALL=C` to override the C level locale settings anyway. - */ - const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); - if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { - /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */ - const char *locale_override = getenv("LC_ALL"); - if (locale_override == NULL || *locale_override == '\0') { - /* LC_ALL is also not set (or is set to an empty string) */ - const _LocaleCoercionTarget *target = NULL; - for (target = _TARGET_LOCALES; target->locale_name; target++) { - const char *new_locale = setlocale(LC_CTYPE, - target->locale_name); - if (new_locale != NULL) { + const char *locale_override = getenv("LC_ALL"); + if (locale_override == NULL || *locale_override == '\0') { + /* LC_ALL is also not set (or is set to an empty string) */ + const _LocaleCoercionTarget *target = NULL; + for (target = _TARGET_LOCALES; target->locale_name; target++) { + const char *new_locale = setlocale(LC_CTYPE, + target->locale_name); + if (new_locale != NULL) { #if !defined(__APPLE__) && !defined(__ANDROID__) && \ - defined(HAVE_LANGINFO_H) && defined(CODESET) - /* Also ensure that nl_langinfo works in this locale */ - char *codeset = nl_langinfo(CODESET); - if (!codeset || *codeset == '\0') { - /* CODESET is not set or empty, so skip coercion */ - new_locale = NULL; - _Py_SetLocaleFromEnv(LC_CTYPE); - continue; - } -#endif - /* Successfully configured locale, so make it the default */ - _coerce_default_locale_settings(target); - return; +defined(HAVE_LANGINFO_H) && defined(CODESET) + /* Also ensure that nl_langinfo works in this locale */ + char *codeset = nl_langinfo(CODESET); + if (!codeset || *codeset == '\0') { + /* CODESET is not set or empty, so skip coercion */ + new_locale = NULL; + _Py_SetLocaleFromEnv(LC_CTYPE); + continue; } +#endif + /* Successfully configured locale, so make it the default */ + _coerce_default_locale_settings(config, target); + return; } } } @@ -648,7 +624,7 @@ _Py_InitializeCore(const _PyCoreConfig *core_config) the locale's charset without having to switch locales. */ _Py_SetLocaleFromEnv(LC_CTYPE); - _emit_stderr_warning_for_legacy_locale(); + _emit_stderr_warning_for_legacy_locale(core_config); #endif err = _Py_HashRandomization_Init(core_config); |