summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2018-08-29 07:58:12 (GMT)
committerGitHub <noreply@github.com>2018-08-29 07:58:12 (GMT)
commit315877dc361d554bec34b4b62c270479ad36a1be (patch)
tree0783fe0301841be7a065316dfc442c64f90a3a4f
parent21786f5186383e8912e761eccd0f4ac1cca83217 (diff)
downloadcpython-315877dc361d554bec34b4b62c270479ad36a1be.zip
cpython-315877dc361d554bec34b4b62c270479ad36a1be.tar.gz
cpython-315877dc361d554bec34b4b62c270479ad36a1be.tar.bz2
bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)
Standard streams like sys.stdout now use the "surrogateescape" error handler, instead of "strict", on the POSIX locale (when the C locale is not coerced and the UTF-8 Mode is disabled). Add tests on sys.stdout.errors with LC_ALL=POSIX.
-rw-r--r--Lib/test/test_sys.py24
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst3
-rw-r--r--Python/pylifecycle.c31
3 files changed, 40 insertions, 18 deletions
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 005c82d..f3dd3bb 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase):
expected = None
self.check_fsencoding(fs_encoding, expected)
- def c_locale_get_error_handler(self, isolated=False, encoding=None):
+ def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
# Force the POSIX locale
env = os.environ.copy()
- env["LC_ALL"] = "C"
+ env["LC_ALL"] = locale
env["PYTHONCOERCECLOCALE"] = "0"
code = '\n'.join((
'import sys',
@@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase):
stdout, stderr = p.communicate()
return stdout
- def test_c_locale_surrogateescape(self):
- out = self.c_locale_get_error_handler(isolated=True)
+ def check_locale_surrogateescape(self, locale):
+ out = self.c_locale_get_error_handler(locale, isolated=True)
self.assertEqual(out,
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
# replace the default error handler
- out = self.c_locale_get_error_handler(encoding=':ignore')
+ out = self.c_locale_get_error_handler(locale, encoding=':ignore')
self.assertEqual(out,
'stdin: ignore\n'
'stdout: ignore\n'
'stderr: backslashreplace\n')
# force the encoding
- out = self.c_locale_get_error_handler(encoding='iso8859-1')
+ out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
self.assertEqual(out,
'stdin: strict\n'
'stdout: strict\n'
'stderr: backslashreplace\n')
- out = self.c_locale_get_error_handler(encoding='iso8859-1:')
+ out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
self.assertEqual(out,
'stdin: strict\n'
'stdout: strict\n'
'stderr: backslashreplace\n')
# have no any effect
- out = self.c_locale_get_error_handler(encoding=':')
+ out = self.c_locale_get_error_handler(locale, encoding=':')
self.assertEqual(out,
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
- out = self.c_locale_get_error_handler(encoding='')
+ out = self.c_locale_get_error_handler(locale, encoding='')
self.assertEqual(out,
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
+ def test_c_locale_surrogateescape(self):
+ self.check_locale_surrogateescape('C')
+
+ def test_posix_locale_surrogateescape(self):
+ self.check_locale_surrogateescape('POSIX')
+
def test_implementation(self):
# This test applies to all implementations equally.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
new file mode 100644
index 0000000..893e4f5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
@@ -0,0 +1,3 @@
+Standard streams like sys.stdout now use the "surrogateescape" error
+handler, instead of "strict", on the POSIX locale (when the C locale is not
+coerced and the UTF-8 Mode is disabled).
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 8c77859..33af06e 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -345,13 +345,13 @@ get_stdio_errors(void)
{
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) {
- /* "surrogateescape" is the default in the legacy C locale */
- if (strcmp(ctype_loc, "C") == 0) {
+ /* surrogateescape is the default in the legacy C and POSIX locales */
+ if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
return "surrogateescape";
}
#ifdef PY_COERCE_C_LOCALE
- /* "surrogateescape" is the default in locale coercion target locales */
+ /* surrogateescape is the default in locale coercion target locales */
const _LocaleCoercionTarget *target = NULL;
for (target = _TARGET_LOCALES; target->locale_name; target++) {
if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -1791,16 +1791,29 @@ init_sys_streams(PyInterpreterState *interp)
if (err) {
*err = '\0';
err++;
- if (*err && !errors) {
- errors = err;
+ if (!err[0]) {
+ err = NULL;
}
}
- if (!encoding && *pythonioencoding) {
- encoding = pythonioencoding;
- if (!errors) {
- errors = "strict";
+
+ /* Does PYTHONIOENCODING contain an encoding? */
+ if (pythonioencoding[0]) {
+ if (!encoding) {
+ encoding = pythonioencoding;
+ }
+
+ /* If the encoding is set but not the error handler,
+ use "strict" error handler by default.
+ PYTHONIOENCODING=latin1 behaves as
+ PYTHONIOENCODING=latin1:strict. */
+ if (!err) {
+ err = "strict";
}
}
+
+ if (!errors && err != NULL) {
+ errors = err;
+ }
}
if (interp->core_config.utf8_mode) {