summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2018-08-29 09:01:33 (GMT)
committerGitHub <noreply@github.com>2018-08-29 09:01:33 (GMT)
commit0b9ea4b211b24464c7d38f63e45e51c275c52dcd (patch)
treea2935b5f8ae0eaaf627d4536e75cc608df697734
parent98c49c6ab239875e35a3c271bc8fabde6c9be804 (diff)
downloadcpython-0b9ea4b211b24464c7d38f63e45e51c275c52dcd.zip
cpython-0b9ea4b211b24464c7d38f63e45e51c275c52dcd.tar.gz
cpython-0b9ea4b211b24464c7d38f63e45e51c275c52dcd.tar.bz2
[3.7] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) (GH-8987)
* bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) Standard streams like sys.stdout now use the "surrogateescape" error handler, instead of "strict", on the POSIX locale (when the C locale is not coerced and the UTF-8 Mode is disabled). Add tests on sys.stdout.errors with LC_ALL=POSIX. Fix the error handler of standard streams like sys.stdout: PYTHONIOENCODING=":" is now ignored instead of setting the error handler to "strict". (cherry picked from commit 315877dc361d554bec34b4b62c270479ad36a1be)
-rw-r--r--Lib/test/test_sys.py28
-rw-r--r--Lib/test/test_utf8_mode.py6
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst3
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst3
-rw-r--r--Python/pylifecycle.c46
5 files changed, 60 insertions, 26 deletions
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 336ae44..27f7590 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase):
expected = None
self.check_fsencoding(fs_encoding, expected)
- def c_locale_get_error_handler(self, isolated=False, encoding=None):
+ def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
# Force the POSIX locale
env = os.environ.copy()
- env["LC_ALL"] = "C"
+ env["LC_ALL"] = locale
env["PYTHONCOERCECLOCALE"] = "0"
code = '\n'.join((
'import sys',
@@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase):
stdout, stderr = p.communicate()
return stdout
- def test_c_locale_surrogateescape(self):
- out = self.c_locale_get_error_handler(isolated=True)
+ def check_locale_surrogateescape(self, locale):
+ out = self.c_locale_get_error_handler(locale, isolated=True)
self.assertEqual(out,
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
# replace the default error handler
- out = self.c_locale_get_error_handler(encoding=':ignore')
+ out = self.c_locale_get_error_handler(locale, encoding=':ignore')
self.assertEqual(out,
'stdin: ignore\n'
'stdout: ignore\n'
'stderr: backslashreplace\n')
# force the encoding
- out = self.c_locale_get_error_handler(encoding='iso8859-1')
+ out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
self.assertEqual(out,
'stdin: strict\n'
'stdout: strict\n'
'stderr: backslashreplace\n')
- out = self.c_locale_get_error_handler(encoding='iso8859-1:')
+ out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
self.assertEqual(out,
'stdin: strict\n'
'stdout: strict\n'
'stderr: backslashreplace\n')
# have no any effect
- out = self.c_locale_get_error_handler(encoding=':')
+ out = self.c_locale_get_error_handler(locale, encoding=':')
self.assertEqual(out,
- 'stdin: strict\n'
- 'stdout: strict\n'
+ 'stdin: surrogateescape\n'
+ 'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
- out = self.c_locale_get_error_handler(encoding='')
+ out = self.c_locale_get_error_handler(locale, encoding='')
self.assertEqual(out,
'stdin: surrogateescape\n'
'stdout: surrogateescape\n'
'stderr: backslashreplace\n')
+ def test_c_locale_surrogateescape(self):
+ self.check_locale_surrogateescape('C')
+
+ def test_posix_locale_surrogateescape(self):
+ self.check_locale_surrogateescape('POSIX')
+
def test_implementation(self):
# This test applies to all implementations equally.
diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
index 4a16b73..554abfa 100644
--- a/Lib/test/test_utf8_mode.py
+++ b/Lib/test/test_utf8_mode.py
@@ -146,9 +146,9 @@ class UTF8ModeTests(unittest.TestCase):
out = self.get_output('-X', 'utf8', '-c', code,
PYTHONIOENCODING=":namereplace")
self.assertEqual(out.splitlines(),
- ['stdin: UTF-8/namereplace',
- 'stdout: UTF-8/namereplace',
- 'stderr: UTF-8/backslashreplace'])
+ ['stdin: utf-8/namereplace',
+ 'stdout: utf-8/namereplace',
+ 'stderr: utf-8/backslashreplace'])
def test_io(self):
code = textwrap.dedent('''
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
new file mode 100644
index 0000000..5ca373a
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
@@ -0,0 +1,3 @@
+Fix the error handler of standard streams like sys.stdout:
+PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
+"strict".
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
new file mode 100644
index 0000000..893e4f5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
@@ -0,0 +1,3 @@
+Standard streams like sys.stdout now use the "surrogateescape" error
+handler, instead of "strict", on the POSIX locale (when the C locale is not
+coerced and the UTF-8 Mode is disabled).
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index fc4ee06..539d62a 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -423,13 +423,13 @@ get_default_standard_stream_error_handler(void)
{
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL) {
- /* "surrogateescape" is the default in the legacy C locale */
- if (strcmp(ctype_loc, "C") == 0) {
+ /* surrogateescape is the default in the legacy C and POSIX locales */
+ if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
return "surrogateescape";
}
#ifdef PY_COERCE_C_LOCALE
- /* "surrogateescape" is the default in locale coercion target locales */
+ /* surrogateescape is the default in locale coercion target locales */
const _LocaleCoercionTarget *target = NULL;
for (target = _TARGET_LOCALES; target->locale_name; target++) {
if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -440,7 +440,7 @@ get_default_standard_stream_error_handler(void)
}
/* Otherwise return NULL to request the typical default error handler */
- return NULL;
+ return "strict";
}
#ifdef PY_COERCE_C_LOCALE
@@ -1851,20 +1851,42 @@ init_sys_streams(PyInterpreterState *interp)
if (err) {
*err = '\0';
err++;
- if (*err && !errors) {
- errors = err;
+ if (!err[0]) {
+ err = NULL;
}
}
- if (*pythonioencoding && !encoding) {
- encoding = pythonioencoding;
+
+ /* Does PYTHONIOENCODING contain an encoding? */
+ if (pythonioencoding[0]) {
+ if (!encoding) {
+ encoding = pythonioencoding;
+ }
+
+ /* If the encoding is set but not the error handler,
+ use "strict" error handler by default.
+ PYTHONIOENCODING=latin1 behaves as
+ PYTHONIOENCODING=latin1:strict. */
+ if (!err) {
+ err = "strict";
+ }
+ }
+
+ if (!errors && err != NULL) {
+ errors = err;
}
}
- else if (interp->core_config.utf8_mode) {
- encoding = "utf-8";
- errors = "surrogateescape";
+
+ if (interp->core_config.utf8_mode) {
+ if (!encoding) {
+ encoding = "utf-8";
+ }
+ if (!errors) {
+ errors = "surrogateescape";
+ }
}
- if (!errors && !pythonioencoding) {
+
+ if (!errors) {
/* Choose the default error handler based on the current locale */
errors = get_default_standard_stream_error_handler();
}