summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/whatsnew/3.5.rst5
-rw-r--r--Lib/test/test_sys.py44
-rw-r--r--Misc/NEWS4
-rw-r--r--Python/pythonrun.c11
4 files changed, 62 insertions, 2 deletions
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
index 176160b..2c044ae 100644
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -79,7 +79,10 @@ New built-in features:
Implementation improvements:
-* None yet.
+* When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale),
+ :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the
+ ``surrogateescape`` error handler, instead of the ``strict`` error handler
+ (:issue:`19977`).
Significantly Improved Library Modules:
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 5a9699f..f3d0b42 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -615,6 +615,50 @@ class SysModuleTest(unittest.TestCase):
expected = None
self.check_fsencoding(fs_encoding, expected)
+ @unittest.skipIf(sys.platform == 'win32',
+ 'test specific to UNIX')
+ def test_c_locale_surrogateescape(self):
+ # Force the POSIX locale
+ env = os.environ.copy()
+ env["LC_ALL"] = "C"
+ code = '\n'.join((
+ 'import codecs, sys',
+ 'def dump(name):',
+ ' std = getattr(sys, name)',
+ ' encoding = codecs.lookup(std.encoding).name',
+ ' print("%s: %s:%s" % (name, encoding, std.errors))',
+ 'dump("stdin")',
+ 'dump("stdout")',
+ 'dump("stderr")',
+ ))
+ p = subprocess.Popen([sys.executable, "-I", "-c", code],
+ stdout=subprocess.PIPE, env=env)
+ out = p.communicate()[0]
+ self.assertEqual(out,
+ b'stdin: ascii:surrogateescape\n'
+ b'stdout: ascii:surrogateescape\n'
+ b'stderr: ascii:backslashreplace\n')
+
+ # replace the default error handler
+ env['PYTHONIOENCODING'] = ':strict'
+ p = subprocess.Popen([sys.executable, "-c", code],
+ stdout=subprocess.PIPE, env=env)
+ out = p.communicate()[0]
+ self.assertEqual(out,
+ b'stdin: ascii:strict\n'
+ b'stdout: ascii:strict\n'
+ b'stderr: ascii:backslashreplace\n')
+
+ # force the encoding
+ env['PYTHONIOENCODING'] = 'iso8859-1'
+ p = subprocess.Popen([sys.executable, "-c", code],
+ stdout=subprocess.PIPE, env=env)
+ out = p.communicate()[0]
+ self.assertEqual(out,
+ b'stdin: iso8859-1:surrogateescape\n'
+ b'stdout: iso8859-1:surrogateescape\n'
+ b'stderr: iso8859-1:backslashreplace\n')
+
def test_implementation(self):
# This test applies to all implementations equally.
diff --git a/Misc/NEWS b/Misc/NEWS
index 5946bc9..2072204 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@ Core and Builtins
Library
-------
+- Issue #19977: When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale),
+ :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the
+ ``surrogateescape`` error handler, instead of the ``strict`` error handler.
+
- Issue #20574: Implement incremental decoder for cp65001 code (Windows code
page 65001, Microsoft UTF-8).
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index e9947e9..bb9f425 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1156,6 +1156,15 @@ initstdio(void)
encoding = _Py_StandardStreamEncoding;
errors = _Py_StandardStreamErrors;
if (!encoding || !errors) {
+ if (!errors) {
+ /* When the LC_CTYPE locale is the POSIX locale ("C locale"),
+ stdin and stdout use the surrogateescape error handler by
+ default, instead of the strict error handler. */
+ char *loc = setlocale(LC_CTYPE, NULL);
+ if (loc != NULL && strcmp(loc, "C") == 0)
+ errors = "surrogateescape";
+ }
+
pythonioencoding = Py_GETENV("PYTHONIOENCODING");
if (pythonioencoding) {
char *err;
@@ -1168,7 +1177,7 @@ initstdio(void)
if (err) {
*err = '\0';
err++;
- if (*err && !errors) {
+ if (*err && !_Py_StandardStreamErrors) {
errors = err;
}
}