diff options
author | Nick Coghlan <ncoghlan@gmail.com> | 2017-06-29 14:48:14 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-29 14:48:14 (GMT) |
commit | 18974c35ad9d25ffea041dc0363dc01889f4a595 (patch) | |
tree | abdacbf17346c452417371bbe8503b7e8e500102 | |
parent | f7d090c165f6cd3d008fe60c78e5324caef53f80 (diff) | |
download | cpython-18974c35ad9d25ffea041dc0363dc01889f4a595.zip cpython-18974c35ad9d25ffea041dc0363dc01889f4a595.tar.gz cpython-18974c35ad9d25ffea041dc0363dc01889f4a595.tar.bz2 |
bpo-30647: Check nl_langinfo(CODESET) in locale coercion (GH-2374)
- On some versions of FreeBSD, setting the "UTF-8" locale
succeeds, but a subsequent "nl_langinfo(CODESET)" fails
- adding a check for this in the coercion logic means that
coercion will happen on systems where this check succeeds,
and will be skipped otherwise
- that way CPython should automatically adapt to changes in
platform behaviour, rather than needing a new release to
enable coercion at build time
- this also allows UTF-8 to be re-enabled as a coercion
target, restoring the locale coercion behaviour on Mac OS X
-rw-r--r-- | Lib/test/test_c_locale_coercion.py | 25 | ||||
-rw-r--r-- | Python/pylifecycle.c | 19 |
2 files changed, 29 insertions, 15 deletions
diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index a4b4626..f5a9fe3 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -1,6 +1,7 @@ # Tests the attempted automatic coercion of the C locale to a UTF-8 locale import unittest +import locale import os import sys import sysconfig @@ -32,24 +33,34 @@ else: # In order to get the warning messages to match up as expected, the candidate # order here must much the target locale order in Python/pylifecycle.c -_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8") - -# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to -# problems encountered on *BSD systems with those test cases -# For additional details see: -# nl_langinfo CODESET error: https://bugs.python.org/issue30647 -# locale handling differences: https://bugs.python.org/issue30672 +_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") # There's no reliable cross-platform way of checking locale alias # lists, so the only way of knowing which of these locales will work # is to try them with locale.setlocale(). We do that in a subprocess # to avoid altering the locale of the test runner. +# +# If the relevant locale module attributes exist, and we're not on a platform +# where we expect it to always succeed, we also check that +# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter +# will skip locale coercion for that particular target locale +_check_nl_langinfo_CODESET = bool( + sys.platform not in ("darwin", "linux") and + hasattr(locale, "nl_langinfo") and + hasattr(locale, "CODESET") +) + def _set_locale_in_subprocess(locale_name): cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" + if _check_nl_langinfo_CODESET: + # If there's no valid CODESET, we expect coercion to be skipped + cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" cmd = cmd_fmt.format(locale_name) result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) return result.rc == 0 + + _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" _EncodingDetails = namedtuple("EncodingDetails", _fields) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 953bc90..3953fec 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -399,17 +399,10 @@ typedef struct _CandidateLocale { static _LocaleCoercionTarget _TARGET_LOCALES[] = { {"C.UTF-8"}, {"C.utf8"}, - /* {"UTF-8"}, */ + {"UTF-8"}, {NULL} }; -/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to - * problems encountered on *BSD systems with those test cases - * For additional details see: - * nl_langinfo CODESET error: https://bugs.python.org/issue30647 - * locale handling differences: https://bugs.python.org/issue30672 - */ - static char * get_default_standard_stream_error_handler(void) { @@ -490,6 +483,16 @@ _Py_CoerceLegacyLocale(void) const char *new_locale = setlocale(LC_CTYPE, target->locale_name); if (new_locale != NULL) { +#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET) + /* Also ensure that nl_langinfo works in this locale */ + char *codeset = nl_langinfo(CODESET); + if (!codeset || *codeset == '\0') { + /* CODESET is not set or empty, so skip coercion */ + new_locale = NULL; + setlocale(LC_CTYPE, ""); + continue; + } +#endif /* Successfully configured locale, so make it the default */ _coerce_default_locale_settings(target); return; |