summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-06-05 11:43:22 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-06-05 11:43:22 (GMT)
commitf86a5e8a93ab293d4cc00a8f2835d6d2cd3baa69 (patch)
tree9d249e4c06f25885dc668edd529b1a4c802c898a
parent91c5a34613fb918c79bb372723e10e106ad9a9be (diff)
downloadcpython-f86a5e8a93ab293d4cc00a8f2835d6d2cd3baa69.zip
cpython-f86a5e8a93ab293d4cc00a8f2835d6d2cd3baa69.tar.gz
cpython-f86a5e8a93ab293d4cc00a8f2835d6d2cd3baa69.tar.bz2
Close #11022: TextIOWrapper doesn't call locale.setlocale() anymore
open() and io.TextIOWrapper are now calling locale.getpreferredencoding(False) instead of locale.getpreferredencoding() in text mode if the encoding is not specified. Don't change temporary the locale encoding using locale.setlocale(), use the current locale encoding instead of the user preferred encoding. Explain also in open() documentation that locale.getpreferredencoding(False) is called if the encoding is not specified.
-rw-r--r--Doc/library/functions.rst7
-rw-r--r--Doc/library/io.rst8
-rw-r--r--Lib/_pyio.py4
-rw-r--r--Lib/test/test_builtin.py40
-rw-r--r--Lib/test/test_io.py35
-rw-r--r--Misc/NEWS6
-rw-r--r--Modules/_io/_iomodule.c5
-rw-r--r--Modules/_io/textio.c4
8 files changed, 82 insertions, 27 deletions
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index d5ac23e..5254299 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -800,9 +800,10 @@ are always available. They are listed here in alphabetical order.
already exists), ``'x'`` for exclusive creation and ``'a'`` for appending
(which on *some* Unix systems, means that *all* writes append to the end of
the file regardless of the current seek position). In text mode, if
- *encoding* is not specified the encoding used is platform dependent. (For
- reading and writing raw bytes use binary mode and leave *encoding*
- unspecified.) The available modes are:
+ *encoding* is not specified the encoding used is platform dependent:
+ ``locale.getpreferredencoding(False)`` is called to get the current locale
+ encoding. (For reading and writing raw bytes use binary mode and leave
+ *encoding* unspecified.) The available modes are:
========= ===============================================================
Character Meaning
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index 4d564bb..e30a016 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -752,7 +752,7 @@ Text I/O
It inherits :class:`TextIOBase`.
*encoding* gives the name of the encoding that the stream will be decoded or
- encoded with. It defaults to :func:`locale.getpreferredencoding`.
+ encoded with. It defaults to ``locale.getpreferredencoding(False)``.
*errors* is an optional string that specifies how encoding and decoding
errors are to be handled. Pass ``'strict'`` to raise a :exc:`ValueError`
@@ -784,6 +784,12 @@ Text I/O
.. versionchanged:: 3.3
The *write_through* argument has been added.
+ .. versionchanged:: 3.3
+ The default *encoding* is now ``locale.getpreferredencoding(False)``
+ instead of ``locale.getpreferredencoding()``. Don't change temporary the
+ locale encoding using :func:`locale.setlocale`, use the current locale
+ encoding instead of the user preferred encoding.
+
:class:`TextIOWrapper` provides one attribute in addition to those of
:class:`TextIOBase` and its parents:
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index f66290f..b684a9f 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -1448,7 +1448,7 @@ class TextIOWrapper(TextIOBase):
r"""Character and line based layer over a BufferedIOBase object, buffer.
encoding gives the name of the encoding that the stream will be
- decoded or encoded with. It defaults to locale.getpreferredencoding.
+ decoded or encoded with. It defaults to locale.getpreferredencoding(False).
errors determines the strictness of encoding and decoding (see the
codecs.register) and defaults to "strict".
@@ -1487,7 +1487,7 @@ class TextIOWrapper(TextIOBase):
# Importing locale may fail if Python is being built
encoding = "ascii"
else:
- encoding = locale.getpreferredencoding()
+ encoding = locale.getpreferredencoding(False)
if not isinstance(encoding, str):
raise ValueError("invalid encoding: %r" % encoding)
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index dfe64bf..d0d17c7 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -1,20 +1,21 @@
# Python test set -- built-in functions
-import platform
-import unittest
-import sys
-import warnings
+import ast
+import builtins
import collections
import io
+import locale
import os
-import ast
-import types
-import builtins
+import pickle
+import platform
import random
+import sys
import traceback
-from test.support import TESTFN, unlink, run_unittest, check_warnings
+import types
+import unittest
+import warnings
from operator import neg
-import pickle
+from test.support import TESTFN, unlink, run_unittest, check_warnings
try:
import pty, signal
except ImportError:
@@ -961,6 +962,27 @@ class BuiltinTest(unittest.TestCase):
fp.close()
unlink(TESTFN)
+ def test_open_default_encoding(self):
+ old_environ = dict(os.environ)
+ try:
+ # try to get a user preferred encoding different than the current
+ # locale encoding to check that open() uses the current locale
+ # encoding and not the user preferred encoding
+ for key in ('LC_ALL', 'LANG', 'LC_CTYPE'):
+ if key in os.environ:
+ del os.environ[key]
+
+ self.write_testfile()
+ current_locale_encoding = locale.getpreferredencoding(False)
+ fp = open(TESTFN, 'w')
+ try:
+ self.assertEqual(fp.encoding, current_locale_encoding)
+ finally:
+ fp.close()
+ finally:
+ os.environ.clear()
+ os.environ.update(old_environ)
+
def test_ord(self):
self.assertEqual(ord(' '), 32)
self.assertEqual(ord('A'), 65)
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index f5bb732..1951a06 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -19,20 +19,21 @@
# test both implementations. This file has lots of examples.
################################################################################
+import abc
+import array
+import errno
+import locale
import os
+import pickle
+import random
+import signal
import sys
import time
-import array
-import random
import unittest
-import weakref
-import abc
-import signal
-import errno
import warnings
-import pickle
-from itertools import cycle, count
+import weakref
from collections import deque
+from itertools import cycle, count
from test import support
import codecs
@@ -1881,6 +1882,24 @@ class TextIOWrapperTest(unittest.TestCase):
t.write("A\rB")
self.assertEqual(r.getvalue(), b"XY\nZA\rB")
+ def test_default_encoding(self):
+ old_environ = dict(os.environ)
+ try:
+ # try to get a user preferred encoding different than the current
+ # locale encoding to check that TextIOWrapper() uses the current
+ # locale encoding and not the user preferred encoding
+ for key in ('LC_ALL', 'LANG', 'LC_CTYPE'):
+ if key in os.environ:
+ del os.environ[key]
+
+ current_locale_encoding = locale.getpreferredencoding(False)
+ b = self.BytesIO()
+ t = self.TextIOWrapper(b)
+ self.assertEqual(t.encoding, current_locale_encoding)
+ finally:
+ os.environ.clear()
+ os.environ.update(old_environ)
+
def test_encoding(self):
# Check the encoding attribute is always set, and valid
b = self.BytesIO()
diff --git a/Misc/NEWS b/Misc/NEWS
index b8d0928..332e408 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,12 @@ What's New in Python 3.3.0 Beta 1?
Core and Builtins
-----------------
+- Issue #11022: open() and io.TextIOWrapper are now calling
+ locale.getpreferredencoding(False) instead of locale.getpreferredencoding()
+ in text mode if the encoding is not specified. Don't change temporary the
+ locale encoding using locale.setlocale(), use the current locale encoding
+ instead of the user preferred encoding.
+
- Issue #14673: Add Eric Snow's sys.implementation implementation.
Library
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
index 31eea3c..61b9f52 100644
--- a/Modules/_io/_iomodule.c
+++ b/Modules/_io/_iomodule.c
@@ -112,8 +112,9 @@ PyDoc_STRVAR(open_doc,
"'a' for appending (which on some Unix systems, means that all writes\n"
"append to the end of the file regardless of the current seek position).\n"
"In text mode, if encoding is not specified the encoding used is platform\n"
-"dependent. (For reading and writing raw bytes use binary mode and leave\n"
-"encoding unspecified.) The available modes are:\n"
+"dependent: locale.getpreferredencoding(False) is called to get the\n"
+"current locale encoding. (For reading and writing raw bytes use binary\n"
+"mode and leave encoding unspecified.) The available modes are:\n"
"\n"
"========= ===============================================================\n"
"Character Meaning\n"
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
index ae105e5..287f165 100644
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -630,7 +630,7 @@ PyDoc_STRVAR(textiowrapper_doc,
"Character and line based layer over a BufferedIOBase object, buffer.\n"
"\n"
"encoding gives the name of the encoding that the stream will be\n"
- "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
+ "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
"\n"
"errors determines the strictness of encoding and decoding (see the\n"
"codecs.register) and defaults to \"strict\".\n"
@@ -898,7 +898,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
else {
use_locale:
self->encoding = _PyObject_CallMethodId(
- state->locale_module, &PyId_getpreferredencoding, NULL);
+ state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
if (self->encoding == NULL) {
catch_ImportError:
/*