summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2017-12-16 03:54:22 (GMT)
committerGitHub <noreply@github.com>2017-12-16 03:54:22 (GMT)
commit9454060e84a669dde63824d9e2fcaf295e34f687 (patch)
tree4c40a6e1bd11aa75819acb7efce4981fc6ba7611 /Python
parente796b2fe26f220107ac50667de6cc86c82b465e3 (diff)
downloadcpython-9454060e84a669dde63824d9e2fcaf295e34f687.zip
cpython-9454060e84a669dde63824d9e2fcaf295e34f687.tar.gz
cpython-9454060e84a669dde63824d9e2fcaf295e34f687.tar.bz2
bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899)
bpo-29240, bpo-32030: If the encoding change (C locale coerced or UTF-8 Mode changed), Py_Main() now reads again the configuration with the new encoding. Changes: * Add _Py_UnixMain() called by main(). * Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be called multipled times. * Rename pymain_parse_cmdline_envvars() to pymain_read_conf(). * Py_Main() now clears orig_argc and orig_argv at exit. * Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is no need anymore to get two copies of the wchar_t** argv. * _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn. * Py_UTF8Mode is now initialized to -1. * Locale coercion (PEP 538) now respects -I and -E options.
Diffstat (limited to 'Python')
-rw-r--r--Python/bltinmodule.c7
-rw-r--r--Python/fileutils.c4
-rw-r--r--Python/pylifecycle.c80
3 files changed, 34 insertions, 57 deletions
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 604493d..e702f7c 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -29,9 +29,10 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
int Py_HasFileSystemDefaultEncoding = 0;
#endif
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
-/* UTF-8 mode (PEP 540): if non-zero, use the UTF-8 encoding, and change stdin
- and stdout error handler to "surrogateescape". */
-int Py_UTF8Mode = 0;
+/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
+ stdin and stdout error handler to "surrogateescape". It is equal to
+ -1 by default: unknown, will be set by Py_Main() */
+int Py_UTF8Mode = -1;
_Py_IDENTIFIER(__builtins__);
_Py_IDENTIFIER(__dict__);
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 4b69049..c4d495d 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -393,7 +393,7 @@ Py_DecodeLocale(const char* arg, size_t *size)
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
#else
- if (Py_UTF8Mode) {
+ if (Py_UTF8Mode == 1) {
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
}
@@ -539,7 +539,7 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeLocaleUTF8(text, error_pos);
#else /* __APPLE__ */
- if (Py_UTF8Mode) {
+ if (Py_UTF8Mode == 1) {
return _Py_EncodeLocaleUTF8(text, error_pos);
}
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 8c62607..6500995 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -385,18 +385,10 @@ static const char *_C_LOCALE_WARNING =
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
-static int
-_legacy_locale_warnings_enabled(void)
-{
- const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
- return (coerce_c_locale != NULL &&
- strncmp(coerce_c_locale, "warn", 5) == 0);
-}
-
static void
-_emit_stderr_warning_for_legacy_locale(void)
+_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config)
{
- if (_legacy_locale_warnings_enabled()) {
+ if (core_config->coerce_c_locale_warn) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
@@ -440,12 +432,12 @@ get_default_standard_stream_error_handler(void)
}
#ifdef PY_COERCE_C_LOCALE
-static const char _C_LOCALE_COERCION_WARNING[] =
+static const char C_LOCALE_COERCION_WARNING[] =
"Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale "
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
static void
-_coerce_default_locale_settings(const _LocaleCoercionTarget *target)
+_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
{
const char *newloc = target->locale_name;
@@ -458,8 +450,8 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
"Error setting LC_CTYPE, skipping C locale coercion\n");
return;
}
- if (_legacy_locale_warnings_enabled()) {
- fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
+ if (config->coerce_c_locale_warn) {
+ fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
}
/* Reconfigure with the overridden environment variables */
@@ -468,47 +460,31 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
#endif
void
-_Py_CoerceLegacyLocale(void)
+_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
{
#ifdef PY_COERCE_C_LOCALE
- /* We ignore the Python -E and -I flags here, as the CLI needs to sort out
- * the locale settings *before* we try to do anything with the command
- * line arguments. For cross-platform debugging purposes, we also need
- * to give end users a way to force even scripts that are otherwise
- * isolated from their environment to use the legacy ASCII-centric C
- * locale.
- *
- * Ignoring -E and -I is safe from a security perspective, as we only use
- * the setting to turn *off* the implicit locale coercion, and anyone with
- * access to the process environment already has the ability to set
- * `LC_ALL=C` to override the C level locale settings anyway.
- */
- const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
- if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
- /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */
- const char *locale_override = getenv("LC_ALL");
- if (locale_override == NULL || *locale_override == '\0') {
- /* LC_ALL is also not set (or is set to an empty string) */
- const _LocaleCoercionTarget *target = NULL;
- for (target = _TARGET_LOCALES; target->locale_name; target++) {
- const char *new_locale = setlocale(LC_CTYPE,
- target->locale_name);
- if (new_locale != NULL) {
+ const char *locale_override = getenv("LC_ALL");
+ if (locale_override == NULL || *locale_override == '\0') {
+ /* LC_ALL is also not set (or is set to an empty string) */
+ const _LocaleCoercionTarget *target = NULL;
+ for (target = _TARGET_LOCALES; target->locale_name; target++) {
+ const char *new_locale = setlocale(LC_CTYPE,
+ target->locale_name);
+ if (new_locale != NULL) {
#if !defined(__APPLE__) && !defined(__ANDROID__) && \
- defined(HAVE_LANGINFO_H) && defined(CODESET)
- /* Also ensure that nl_langinfo works in this locale */
- char *codeset = nl_langinfo(CODESET);
- if (!codeset || *codeset == '\0') {
- /* CODESET is not set or empty, so skip coercion */
- new_locale = NULL;
- _Py_SetLocaleFromEnv(LC_CTYPE);
- continue;
- }
-#endif
- /* Successfully configured locale, so make it the default */
- _coerce_default_locale_settings(target);
- return;
+defined(HAVE_LANGINFO_H) && defined(CODESET)
+ /* Also ensure that nl_langinfo works in this locale */
+ char *codeset = nl_langinfo(CODESET);
+ if (!codeset || *codeset == '\0') {
+ /* CODESET is not set or empty, so skip coercion */
+ new_locale = NULL;
+ _Py_SetLocaleFromEnv(LC_CTYPE);
+ continue;
}
+#endif
+ /* Successfully configured locale, so make it the default */
+ _coerce_default_locale_settings(config, target);
+ return;
}
}
}
@@ -648,7 +624,7 @@ _Py_InitializeCore(const _PyCoreConfig *core_config)
the locale's charset without having to switch
locales. */
_Py_SetLocaleFromEnv(LC_CTYPE);
- _emit_stderr_warning_for_legacy_locale();
+ _emit_stderr_warning_for_legacy_locale(core_config);
#endif
err = _Py_HashRandomization_Init(core_config);