summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/pylifecycle.h1
-rw-r--r--Lib/test/test_c_locale_coercion.py21
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst4
-rw-r--r--Modules/readline.c2
-rw-r--r--Programs/python.c9
-rw-r--r--Python/pylifecycle.c77
6 files changed, 90 insertions, 24 deletions
diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h
index 8bbce7f..f7286f3 100644
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@@ -137,6 +137,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
#ifndef Py_LIMITED_API
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void);
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
+PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
#endif
#ifdef __cplusplus
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py
index 635c98f..2a22739 100644
--- a/Lib/test/test_c_locale_coercion.py
+++ b/Lib/test/test_c_locale_coercion.py
@@ -6,7 +6,6 @@ import os
import sys
import sysconfig
import shutil
-import subprocess
from collections import namedtuple
import test.support
@@ -18,9 +17,12 @@ from test.support.script_helper import (
# Set our expectation for the default encoding used in the C locale
# for the filesystem encoding and the standard streams
-# AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII
+# While most *nix platforms default to ASCII in the C locale, some use a
+# different encoding.
if sys.platform.startswith("aix"):
C_LOCALE_STREAM_ENCODING = "iso8859-1"
+elif test.support.is_android:
+ C_LOCALE_STREAM_ENCODING = "utf-8"
else:
C_LOCALE_STREAM_ENCODING = "ascii"
@@ -301,6 +303,19 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
# See https://bugs.python.org/issue30672 for discussion
if locale_to_set == "POSIX":
continue
+
+ # Platforms using UTF-8 in the C locale do not print
+ # CLI_COERCION_WARNING when all the locale envt variables are
+ # not set or set to the empty string.
+ _expected_warnings = expected_warnings
+ for _env_var in base_var_dict:
+ if base_var_dict[_env_var]:
+ break
+ else:
+ if (C_LOCALE_STREAM_ENCODING == "utf-8" and
+ locale_to_set == "" and coerce_c_locale == "warn"):
+ _expected_warnings = None
+
with self.subTest(env_var=env_var,
nominal_locale=locale_to_set,
PYTHONCOERCECLOCALE=coerce_c_locale):
@@ -312,7 +327,7 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
self._check_child_encoding_details(var_dict,
fs_encoding,
stream_encoding,
- expected_warnings,
+ _expected_warnings,
coercion_expected)
def test_test_PYTHONCOERCECLOCALE_not_set(self):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst b/Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst
new file mode 100644
index 0000000..edf4581
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2017-11-12-11-44-22.bpo-28180.HQX000.rst
@@ -0,0 +1,4 @@
+A new internal ``_Py_SetLocaleFromEnv(category)`` helper function has been
+added in order to improve the consistency of behaviour across different
+``libc`` implementations (e.g. Android doesn't support setting the locale from
+the environment by default).
diff --git a/Modules/readline.c b/Modules/readline.c
index 951bc82..d0e3b91 100644
--- a/Modules/readline.c
+++ b/Modules/readline.c
@@ -1245,7 +1245,7 @@ call_readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
char *saved_locale = strdup(setlocale(LC_CTYPE, NULL));
if (!saved_locale)
Py_FatalError("not enough memory to save locale");
- setlocale(LC_CTYPE, "");
+ _Py_SetLocaleFromEnv(LC_CTYPE);
#endif
if (sys_stdin != rl_instream || sys_stdout != rl_outstream) {
diff --git a/Programs/python.c b/Programs/python.c
index 4f6b919..270a11b 100644
--- a/Programs/python.c
+++ b/Programs/python.c
@@ -54,15 +54,8 @@ main(int argc, char **argv)
return 1;
}
-#ifdef __ANDROID__
- /* Passing "" to setlocale() on Android requests the C locale rather
- * than checking environment variables, so request C.UTF-8 explicitly
- */
- setlocale(LC_ALL, "C.UTF-8");
-#else
/* Reconfigure the locale to the default for this process */
- setlocale(LC_ALL, "");
-#endif
+ _Py_SetLocaleFromEnv(LC_ALL);
/* The legacy C locale assumes ASCII as the default text encoding, which
* causes problems not only for the CPython runtime, but also other
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 4e8bbb6..8817be1 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -459,7 +459,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
const char *newloc = target->locale_name;
/* Reset locale back to currently configured defaults */
- setlocale(LC_ALL, "");
+ _Py_SetLocaleFromEnv(LC_ALL);
/* Set the relevant locale environment variable */
if (setenv("LC_CTYPE", newloc, 1)) {
@@ -472,7 +472,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
}
/* Reconfigure with the overridden environment variables */
- setlocale(LC_ALL, "");
+ _Py_SetLocaleFromEnv(LC_ALL);
}
#endif
@@ -503,13 +503,14 @@ _Py_CoerceLegacyLocale(void)
const char *new_locale = setlocale(LC_CTYPE,
target->locale_name);
if (new_locale != NULL) {
-#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET)
+#if !defined(__APPLE__) && !defined(__ANDROID__) && \
+ defined(HAVE_LANGINFO_H) && defined(CODESET)
/* Also ensure that nl_langinfo works in this locale */
char *codeset = nl_langinfo(CODESET);
if (!codeset || *codeset == '\0') {
/* CODESET is not set or empty, so skip coercion */
new_locale = NULL;
- setlocale(LC_CTYPE, "");
+ _Py_SetLocaleFromEnv(LC_CTYPE);
continue;
}
#endif
@@ -524,6 +525,65 @@ _Py_CoerceLegacyLocale(void)
#endif
}
+/* _Py_SetLocaleFromEnv() is a wrapper around setlocale(category, "") to
+ * isolate the idiosyncrasies of different libc implementations. It reads the
+ * appropriate environment variable and uses its value to select the locale for
+ * 'category'. */
+char *
+_Py_SetLocaleFromEnv(int category)
+{
+#ifdef __ANDROID__
+ const char *locale;
+ const char **pvar;
+#ifdef PY_COERCE_C_LOCALE
+ const char *coerce_c_locale;
+#endif
+ const char *utf8_locale = "C.UTF-8";
+ const char *env_var_set[] = {
+ "LC_ALL",
+ "LC_CTYPE",
+ "LANG",
+ NULL,
+ };
+
+ /* Android setlocale(category, "") doesn't check the environment variables
+ * and incorrectly sets the "C" locale at API 24 and older APIs. We only
+ * check the environment variables listed in env_var_set. */
+ for (pvar=env_var_set; *pvar; pvar++) {
+ locale = getenv(*pvar);
+ if (locale != NULL && *locale != '\0') {
+ if (strcmp(locale, utf8_locale) == 0 ||
+ strcmp(locale, "en_US.UTF-8") == 0) {
+ return setlocale(category, utf8_locale);
+ }
+ return setlocale(category, "C");
+ }
+ }
+
+ /* Android uses UTF-8, so explicitly set the locale to C.UTF-8 if none of
+ * LC_ALL, LC_CTYPE, or LANG is set to a non-empty string.
+ * Quote from POSIX section "8.2 Internationalization Variables":
+ * "4. If the LANG environment variable is not set or is set to the empty
+ * string, the implementation-defined default locale shall be used." */
+
+#ifdef PY_COERCE_C_LOCALE
+ coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
+ if (coerce_c_locale == NULL || strcmp(coerce_c_locale, "0") != 0) {
+ /* Some other ported code may check the environment variables (e.g. in
+ * extension modules), so we make sure that they match the locale
+ * configuration */
+ if (setenv("LC_CTYPE", utf8_locale, 1)) {
+ fprintf(stderr, "Warning: failed setting the LC_CTYPE "
+ "environment variable to %s\n", utf8_locale);
+ }
+ }
+#endif
+ return setlocale(category, utf8_locale);
+#else /* __ANDROID__ */
+ return setlocale(category, "");
+#endif /* __ANDROID__ */
+}
+
/* Global initializations. Can be undone by Py_Finalize(). Don't
call this twice without an intervening Py_Finalize() call.
@@ -599,20 +659,13 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
exit(1);
}
-#ifdef __ANDROID__
- /* Passing "" to setlocale() on Android requests the C locale rather
- * than checking environment variables, so request C.UTF-8 explicitly
- */
- setlocale(LC_CTYPE, "C.UTF-8");
-#else
#ifndef MS_WINDOWS
/* Set up the LC_CTYPE locale, so we can obtain
the locale's charset without having to switch
locales. */
- setlocale(LC_CTYPE, "");
+ _Py_SetLocaleFromEnv(LC_CTYPE);
_emit_stderr_warning_for_legacy_locale();
#endif
-#endif
if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0')
set_flag(&Py_DebugFlag, p);