diff options
author | Victor Stinner <vstinner@redhat.com> | 2018-08-28 10:35:44 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-28 10:35:44 (GMT) |
commit | 5cb258950ce9b69b1f65646431c464c0c17b1510 (patch) | |
tree | 3f63a61cad2e65c83ab5f874a13a44def04bc182 | |
parent | d658deac6060ee92b449a3bf424b460eafd99f3e (diff) | |
download | cpython-5cb258950ce9b69b1f65646431c464c0c17b1510.zip cpython-5cb258950ce9b69b1f65646431c464c0c17b1510.tar.gz cpython-5cb258950ce9b69b1f65646431c464c0c17b1510.tar.bz2 |
bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972)
* The UTF-8 Mode is now also enabled by the "POSIX" locale, not only
by the "C" locale.
* On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces
the ASCII encoding if the LC_CTYPE locale is "POSIX", not only if
the LC_CTYPE locale is "C".
* test_utf8_mode.test_cmd_line() checks also that the command line
arguments are decoded from UTF-8 when the the UTF-8 Mode is enabled
with POSIX locale or C locale.
-rw-r--r-- | Lib/test/test_utf8_mode.py | 23 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst | 2 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst | 3 | ||||
-rw-r--r-- | Python/coreconfig.c | 16 | ||||
-rw-r--r-- | Python/fileutils.c | 2 |
5 files changed, 35 insertions, 11 deletions
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 3e918fd..df988c1 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -12,6 +12,7 @@ from test.support.script_helper import assert_python_ok, assert_python_failure MS_WINDOWS = (sys.platform == 'win32') +POSIX_LOCALES = ('C', 'POSIX') class UTF8ModeTests(unittest.TestCase): @@ -23,7 +24,7 @@ class UTF8ModeTests(unittest.TestCase): def posix_locale(self): loc = locale.setlocale(locale.LC_CTYPE, None) - return (loc == 'C') + return (loc in POSIX_LOCALES) def get_output(self, *args, failure=False, **kw): kw = dict(self.DEFAULT_ENV, **kw) @@ -39,8 +40,10 @@ class UTF8ModeTests(unittest.TestCase): def test_posix_locale(self): code = 'import sys; print(sys.flags.utf8_mode)' - out = self.get_output('-c', code, LC_ALL='C') - self.assertEqual(out, '1') + for loc in POSIX_LOCALES: + with self.subTest(LC_ALL=loc): + out = self.get_output('-c', code, LC_ALL=loc) + self.assertEqual(out, '1') def test_xoption(self): code = 'import sys; print(sys.flags.utf8_mode)' @@ -201,8 +204,10 @@ class UTF8ModeTests(unittest.TestCase): out = self.get_output('-X', 'utf8', '-c', code) self.assertEqual(out, 'UTF-8 UTF-8') - out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C') - self.assertEqual(out, 'UTF-8 UTF-8') + for loc in POSIX_LOCALES: + with self.subTest(LC_ALL=loc): + out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc) + self.assertEqual(out, 'UTF-8 UTF-8') @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') def test_cmd_line(self): @@ -217,13 +222,19 @@ class UTF8ModeTests(unittest.TestCase): self.assertEqual(args, ascii(expected), out) check('utf8', [arg_utf8]) + for loc in POSIX_LOCALES: + with self.subTest(LC_ALL=loc): + check('utf8', [arg_utf8], LC_ALL=loc) + if sys.platform == 'darwin' or support.is_android: c_arg = arg_utf8 elif sys.platform.startswith("aix"): c_arg = arg.decode('iso-8859-1') else: c_arg = arg_ascii - check('utf8=0', [c_arg], LC_ALL='C') + for loc in POSIX_LOCALES: + with self.subTest(LC_ALL=loc): + check('utf8=0', [c_arg], LC_ALL=loc) def test_optim_level(self): # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst new file mode 100644 index 0000000..280a892 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst @@ -0,0 +1,2 @@ +The UTF-8 Mode is now also enabled by the "POSIX" locale, not only by the "C" +locale. diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst new file mode 100644 index 0000000..9fce794 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst @@ -0,0 +1,3 @@ +On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces the +ASCII encoding if the LC_CTYPE locale is "POSIX", not only if the LC_CTYPE +locale is "C". diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 1b9e26e..acf4645 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -1,5 +1,6 @@ #include "Python.h" #include "internal/pystate.h" +#include <locale.h> #define DECODE_LOCALE_ERR(NAME, LEN) \ @@ -828,14 +829,21 @@ static void config_init_locale(_PyCoreConfig *config) { if (_Py_LegacyLocaleDetected()) { - /* POSIX locale: enable C locale coercion and UTF-8 Mode */ - if (config->utf8_mode < 0) { - config->utf8_mode = 1; - } + /* The C locale enables the C locale coercion (PEP 538) */ if (config->coerce_c_locale < 0) { config->coerce_c_locale = 1; } } +#ifndef MS_WINDOWS + const char *ctype_loc = setlocale(LC_CTYPE, NULL); + if (ctype_loc != NULL + && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0)) { + /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ + if (config->utf8_mode < 0) { + config->utf8_mode = 1; + } + } +#endif } diff --git a/Python/fileutils.c b/Python/fileutils.c index 35869c8..b413f4e 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -128,7 +128,7 @@ check_force_ascii(void) loc = setlocale(LC_CTYPE, NULL); if (loc == NULL) goto error; - if (strcmp(loc, "C") != 0) { + if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) { /* the LC_CTYPE locale is different than C */ return 0; } |