From 1588be66d7b0eeebc4614309cd0fc837ff52776a Mon Sep 17 00:00:00 2001 From: xdegaye Date: Sun, 12 Nov 2017 12:45:59 +0100 Subject: bpo-28180: Fix the implementation of PEP 538 on Android (GH-4334) --- Include/pylifecycle.h | 1 + Lib/test/test_c_locale_coercion.py | 21 +++++- .../2017-11-12-11-44-22.bpo-28180.HQX000.rst | 4 ++ Modules/readline.c | 2 +- Programs/python.c | 9 +-- Python/pylifecycle.c | 77 ++++++++++++++++++---- 6 files changed, 90 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index 8bbce7f..f7286f3 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -137,6 +137,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size); #ifndef Py_LIMITED_API PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void); PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void); +PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category); #endif #ifdef __cplusplus diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index 635c98f..2a22739 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -6,7 +6,6 @@ import os import sys import sysconfig import shutil -import subprocess from collections import namedtuple import test.support @@ -18,9 +17,12 @@ from test.support.script_helper import ( # Set our expectation for the default encoding used in the C locale # for the filesystem encoding and the standard streams -# AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII +# While most *nix platforms default to ASCII in the C locale, some use a +# different encoding. if sys.platform.startswith("aix"): C_LOCALE_STREAM_ENCODING = "iso8859-1" +elif test.support.is_android: + C_LOCALE_STREAM_ENCODING = "utf-8" else: C_LOCALE_STREAM_ENCODING = "ascii" @@ -301,6 +303,19 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): # See https://bugs.python.org/issue30672 for discussion if locale_to_set == "POSIX": continue + + # Platforms using UTF-8 in the C locale do not print + # CLI_COERCION_WARNING when all the locale envt variables are + # not set or set to the empty string. + _expected_warnings = expected_warnings + for _env_var in base_var_dict: + if base_var_dict[_env_var]: + break + else: + if (C_LOCALE_STREAM_ENCODING == "utf-8" and + locale_to_set == "" and coerce_c_locale == "warn"): + _expected_warnings = None + with self.subTest(env_var=env_var, nominal_locale=locale_to_set, PYTHONCOERCECLOCALE=coerce_c_locale): @@ -312,7 +327,7 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): self._check_child_encoding_details(var_dict, fs_encoding, stream_encoding, - expected_warnings, + _expected_warnings, coercion_expected) def test_test_PYTHONCOERCECLOCALE_not_set(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst b/Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst new file mode 100644 index 0000000..edf4581 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst @@ -0,0 +1,4 @@ +A new internal ``_Py_SetLocaleFromEnv(category)`` helper function has been +added in order to improve the consistency of behaviour across different +``libc`` implementations (e.g. Android doesn't support setting the locale from +the environment by default). diff --git a/Modules/readline.c b/Modules/readline.c index 951bc82..d0e3b91 100644 --- a/Modules/readline.c +++ b/Modules/readline.c @@ -1245,7 +1245,7 @@ call_readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt) char *saved_locale = strdup(setlocale(LC_CTYPE, NULL)); if (!saved_locale) Py_FatalError("not enough memory to save locale"); - setlocale(LC_CTYPE, ""); + _Py_SetLocaleFromEnv(LC_CTYPE); #endif if (sys_stdin != rl_instream || sys_stdout != rl_outstream) { diff --git a/Programs/python.c b/Programs/python.c index 4f6b919..270a11b 100644 --- a/Programs/python.c +++ b/Programs/python.c @@ -54,15 +54,8 @@ main(int argc, char **argv) return 1; } -#ifdef __ANDROID__ - /* Passing "" to setlocale() on Android requests the C locale rather - * than checking environment variables, so request C.UTF-8 explicitly - */ - setlocale(LC_ALL, "C.UTF-8"); -#else /* Reconfigure the locale to the default for this process */ - setlocale(LC_ALL, ""); -#endif + _Py_SetLocaleFromEnv(LC_ALL); /* The legacy C locale assumes ASCII as the default text encoding, which * causes problems not only for the CPython runtime, but also other diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 4e8bbb6..8817be1 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -459,7 +459,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) const char *newloc = target->locale_name; /* Reset locale back to currently configured defaults */ - setlocale(LC_ALL, ""); + _Py_SetLocaleFromEnv(LC_ALL); /* Set the relevant locale environment variable */ if (setenv("LC_CTYPE", newloc, 1)) { @@ -472,7 +472,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) } /* Reconfigure with the overridden environment variables */ - setlocale(LC_ALL, ""); + _Py_SetLocaleFromEnv(LC_ALL); } #endif @@ -503,13 +503,14 @@ _Py_CoerceLegacyLocale(void) const char *new_locale = setlocale(LC_CTYPE, target->locale_name); if (new_locale != NULL) { -#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET) +#if !defined(__APPLE__) && !defined(__ANDROID__) && \ + defined(HAVE_LANGINFO_H) && defined(CODESET) /* Also ensure that nl_langinfo works in this locale */ char *codeset = nl_langinfo(CODESET); if (!codeset || *codeset == '\0') { /* CODESET is not set or empty, so skip coercion */ new_locale = NULL; - setlocale(LC_CTYPE, ""); + _Py_SetLocaleFromEnv(LC_CTYPE); continue; } #endif @@ -524,6 +525,65 @@ _Py_CoerceLegacyLocale(void) #endif } +/* _Py_SetLocaleFromEnv() is a wrapper around setlocale(category, "") to + * isolate the idiosyncrasies of different libc implementations. It reads the + * appropriate environment variable and uses its value to select the locale for + * 'category'. */ +char * +_Py_SetLocaleFromEnv(int category) +{ +#ifdef __ANDROID__ + const char *locale; + const char **pvar; +#ifdef PY_COERCE_C_LOCALE + const char *coerce_c_locale; +#endif + const char *utf8_locale = "C.UTF-8"; + const char *env_var_set[] = { + "LC_ALL", + "LC_CTYPE", + "LANG", + NULL, + }; + + /* Android setlocale(category, "") doesn't check the environment variables + * and incorrectly sets the "C" locale at API 24 and older APIs. We only + * check the environment variables listed in env_var_set. */ + for (pvar=env_var_set; *pvar; pvar++) { + locale = getenv(*pvar); + if (locale != NULL && *locale != '\0') { + if (strcmp(locale, utf8_locale) == 0 || + strcmp(locale, "en_US.UTF-8") == 0) { + return setlocale(category, utf8_locale); + } + return setlocale(category, "C"); + } + } + + /* Android uses UTF-8, so explicitly set the locale to C.UTF-8 if none of + * LC_ALL, LC_CTYPE, or LANG is set to a non-empty string. + * Quote from POSIX section "8.2 Internationalization Variables": + * "4. If the LANG environment variable is not set or is set to the empty + * string, the implementation-defined default locale shall be used." */ + +#ifdef PY_COERCE_C_LOCALE + coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); + if (coerce_c_locale == NULL || strcmp(coerce_c_locale, "0") != 0) { + /* Some other ported code may check the environment variables (e.g. in + * extension modules), so we make sure that they match the locale + * configuration */ + if (setenv("LC_CTYPE", utf8_locale, 1)) { + fprintf(stderr, "Warning: failed setting the LC_CTYPE " + "environment variable to %s\n", utf8_locale); + } + } +#endif + return setlocale(category, utf8_locale); +#else /* __ANDROID__ */ + return setlocale(category, ""); +#endif /* __ANDROID__ */ +} + /* Global initializations. Can be undone by Py_Finalize(). Don't call this twice without an intervening Py_Finalize() call. @@ -599,20 +659,13 @@ void _Py_InitializeCore(const _PyCoreConfig *config) exit(1); } -#ifdef __ANDROID__ - /* Passing "" to setlocale() on Android requests the C locale rather - * than checking environment variables, so request C.UTF-8 explicitly - */ - setlocale(LC_CTYPE, "C.UTF-8"); -#else #ifndef MS_WINDOWS /* Set up the LC_CTYPE locale, so we can obtain the locale's charset without having to switch locales. */ - setlocale(LC_CTYPE, ""); + _Py_SetLocaleFromEnv(LC_CTYPE); _emit_stderr_warning_for_legacy_locale(); #endif -#endif if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0') set_flag(&Py_DebugFlag, p); -- cgit v0.12