diff options
author | Victor Stinner <vstinner@redhat.com> | 2018-08-29 09:01:33 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-29 09:01:33 (GMT) |
commit | 0b9ea4b211b24464c7d38f63e45e51c275c52dcd (patch) | |
tree | a2935b5f8ae0eaaf627d4536e75cc608df697734 | |
parent | 98c49c6ab239875e35a3c271bc8fabde6c9be804 (diff) | |
download | cpython-0b9ea4b211b24464c7d38f63e45e51c275c52dcd.zip cpython-0b9ea4b211b24464c7d38f63e45e51c275c52dcd.tar.gz cpython-0b9ea4b211b24464c7d38f63e45e51c275c52dcd.tar.bz2 |
[3.7] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) (GH-8987)
* bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)
Standard streams like sys.stdout now use the "surrogateescape" error
handler, instead of "strict", on the POSIX locale (when the C locale is not
coerced and the UTF-8 Mode is disabled).
Add tests on sys.stdout.errors with LC_ALL=POSIX.
Fix the error handler of standard streams like sys.stdout:
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
"strict".
(cherry picked from commit 315877dc361d554bec34b4b62c270479ad36a1be)
-rw-r--r-- | Lib/test/test_sys.py | 28 | ||||
-rw-r--r-- | Lib/test/test_utf8_mode.py | 6 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst | 3 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst | 3 | ||||
-rw-r--r-- | Python/pylifecycle.c | 46 |
5 files changed, 60 insertions, 26 deletions
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 336ae44..27f7590 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase): expected = None self.check_fsencoding(fs_encoding, expected) - def c_locale_get_error_handler(self, isolated=False, encoding=None): + def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): # Force the POSIX locale env = os.environ.copy() - env["LC_ALL"] = "C" + env["LC_ALL"] = locale env["PYTHONCOERCECLOCALE"] = "0" code = '\n'.join(( 'import sys', @@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase): stdout, stderr = p.communicate() return stdout - def test_c_locale_surrogateescape(self): - out = self.c_locale_get_error_handler(isolated=True) + def check_locale_surrogateescape(self, locale): + out = self.c_locale_get_error_handler(locale, isolated=True) self.assertEqual(out, 'stdin: surrogateescape\n' 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') # replace the default error handler - out = self.c_locale_get_error_handler(encoding=':ignore') + out = self.c_locale_get_error_handler(locale, encoding=':ignore') self.assertEqual(out, 'stdin: ignore\n' 'stdout: ignore\n' 'stderr: backslashreplace\n') # force the encoding - out = self.c_locale_get_error_handler(encoding='iso8859-1') + out = self.c_locale_get_error_handler(locale, encoding='iso8859-1') self.assertEqual(out, 'stdin: strict\n' 'stdout: strict\n' 'stderr: backslashreplace\n') - out = self.c_locale_get_error_handler(encoding='iso8859-1:') + out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:') self.assertEqual(out, 'stdin: strict\n' 'stdout: strict\n' 'stderr: backslashreplace\n') # have no any effect - out = self.c_locale_get_error_handler(encoding=':') + out = self.c_locale_get_error_handler(locale, encoding=':') self.assertEqual(out, - 'stdin: strict\n' - 'stdout: strict\n' + 'stdin: surrogateescape\n' + 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') - out = self.c_locale_get_error_handler(encoding='') + out = self.c_locale_get_error_handler(locale, encoding='') self.assertEqual(out, 'stdin: surrogateescape\n' 'stdout: surrogateescape\n' 'stderr: backslashreplace\n') + def test_c_locale_surrogateescape(self): + self.check_locale_surrogateescape('C') + + def test_posix_locale_surrogateescape(self): + self.check_locale_surrogateescape('POSIX') + def test_implementation(self): # This test applies to all implementations equally. diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 4a16b73..554abfa 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -146,9 +146,9 @@ class UTF8ModeTests(unittest.TestCase): out = self.get_output('-X', 'utf8', '-c', code, PYTHONIOENCODING=":namereplace") self.assertEqual(out.splitlines(), - ['stdin: UTF-8/namereplace', - 'stdout: UTF-8/namereplace', - 'stderr: UTF-8/backslashreplace']) + ['stdin: utf-8/namereplace', + 'stdout: utf-8/namereplace', + 'stderr: utf-8/backslashreplace']) def test_io(self): code = textwrap.dedent(''' diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst new file mode 100644 index 0000000..5ca373a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst @@ -0,0 +1,3 @@ +Fix the error handler of standard streams like sys.stdout: +PYTHONIOENCODING=":" is now ignored instead of setting the error handler to +"strict". diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst new file mode 100644 index 0000000..893e4f5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst @@ -0,0 +1,3 @@ +Standard streams like sys.stdout now use the "surrogateescape" error +handler, instead of "strict", on the POSIX locale (when the C locale is not +coerced and the UTF-8 Mode is disabled). diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index fc4ee06..539d62a 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -423,13 +423,13 @@ get_default_standard_stream_error_handler(void) { const char *ctype_loc = setlocale(LC_CTYPE, NULL); if (ctype_loc != NULL) { - /* "surrogateescape" is the default in the legacy C locale */ - if (strcmp(ctype_loc, "C") == 0) { + /* surrogateescape is the default in the legacy C and POSIX locales */ + if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) { return "surrogateescape"; } #ifdef PY_COERCE_C_LOCALE - /* "surrogateescape" is the default in locale coercion target locales */ + /* surrogateescape is the default in locale coercion target locales */ const _LocaleCoercionTarget *target = NULL; for (target = _TARGET_LOCALES; target->locale_name; target++) { if (strcmp(ctype_loc, target->locale_name) == 0) { @@ -440,7 +440,7 @@ get_default_standard_stream_error_handler(void) } /* Otherwise return NULL to request the typical default error handler */ - return NULL; + return "strict"; } #ifdef PY_COERCE_C_LOCALE @@ -1851,20 +1851,42 @@ init_sys_streams(PyInterpreterState *interp) if (err) { *err = '\0'; err++; - if (*err && !errors) { - errors = err; + if (!err[0]) { + err = NULL; } } - if (*pythonioencoding && !encoding) { - encoding = pythonioencoding; + + /* Does PYTHONIOENCODING contain an encoding? */ + if (pythonioencoding[0]) { + if (!encoding) { + encoding = pythonioencoding; + } + + /* If the encoding is set but not the error handler, + use "strict" error handler by default. + PYTHONIOENCODING=latin1 behaves as + PYTHONIOENCODING=latin1:strict. */ + if (!err) { + err = "strict"; + } + } + + if (!errors && err != NULL) { + errors = err; } } - else if (interp->core_config.utf8_mode) { - encoding = "utf-8"; - errors = "surrogateescape"; + + if (interp->core_config.utf8_mode) { + if (!encoding) { + encoding = "utf-8"; + } + if (!errors) { + errors = "surrogateescape"; + } } - if (!errors && !pythonioencoding) { + + if (!errors) { /* Choose the default error handler based on the current locale */ errors = get_default_standard_stream_error_handler(); } |