diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_pyio.py | 47 | ||||
-rw-r--r-- | Lib/bz2.py | 1 | ||||
-rw-r--r-- | Lib/configparser.py | 1 | ||||
-rw-r--r-- | Lib/gzip.py | 1 | ||||
-rw-r--r-- | Lib/io.py | 2 | ||||
-rw-r--r-- | Lib/lzma.py | 1 | ||||
-rw-r--r-- | Lib/pathlib.py | 4 | ||||
-rw-r--r-- | Lib/site.py | 4 | ||||
-rw-r--r-- | Lib/subprocess.py | 9 | ||||
-rw-r--r-- | Lib/tempfile.py | 7 | ||||
-rw-r--r-- | Lib/test/exception_hierarchy.txt | 1 | ||||
-rw-r--r-- | Lib/test/test_embed.py | 1 | ||||
-rw-r--r-- | Lib/test/test_io.py | 23 | ||||
-rw-r--r-- | Lib/test/test_pickle.py | 3 | ||||
-rw-r--r-- | Lib/test/test_sys.py | 3 |
15 files changed, 93 insertions, 15 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 4804ed2..0f182d4 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -40,6 +40,29 @@ _IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mo _CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE +def text_encoding(encoding, stacklevel=2): + """ + A helper function to choose the text encoding. + + When encoding is not None, just return it. + Otherwise, return the default text encoding (i.e. "locale"). + + This function emits an EncodingWarning if *encoding* is None and + sys.flags.warn_default_encoding is true. + + This can be used in APIs with an encoding=None parameter + that pass it to TextIOWrapper or open. + However, please consider using encoding="utf-8" for new APIs. + """ + if encoding is None: + encoding = "locale" + if sys.flags.warn_default_encoding: + import warnings + warnings.warn("'encoding' argument not specified.", + EncodingWarning, stacklevel + 1) + return encoding + + def open(file, mode="r", buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None): @@ -248,6 +271,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, result = buffer if binary: return result + encoding = text_encoding(encoding) text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) result = text text.mode = mode @@ -2004,19 +2028,22 @@ class TextIOWrapper(TextIOBase): def __init__(self, buffer, encoding=None, errors=None, newline=None, line_buffering=False, write_through=False): self._check_newline(newline) - if encoding is None: + encoding = text_encoding(encoding) + + if encoding == "locale": try: - encoding = os.device_encoding(buffer.fileno()) + encoding = os.device_encoding(buffer.fileno()) or "locale" except (AttributeError, UnsupportedOperation): pass - if encoding is None: - try: - import locale - except ImportError: - # Importing locale may fail if Python is being built - encoding = "ascii" - else: - encoding = locale.getpreferredencoding(False) + + if encoding == "locale": + try: + import locale + except ImportError: + # Importing locale may fail if Python is being built + encoding = "utf-8" + else: + encoding = locale.getpreferredencoding(False) if not isinstance(encoding, str): raise ValueError("invalid encoding: %r" % encoding) @@ -311,6 +311,7 @@ def open(filename, mode="rb", compresslevel=9, binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel) if "t" in mode: + encoding = io.text_encoding(encoding) return io.TextIOWrapper(binary_file, encoding, errors, newline) else: return binary_file diff --git a/Lib/configparser.py b/Lib/configparser.py index 924cc56..3b4cb5e 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -690,6 +690,7 @@ class RawConfigParser(MutableMapping): """ if isinstance(filenames, (str, bytes, os.PathLike)): filenames = [filenames] + encoding = io.text_encoding(encoding) read_ok = [] for filename in filenames: try: diff --git a/Lib/gzip.py b/Lib/gzip.py index 1369157..0a8993b 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -62,6 +62,7 @@ def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, raise TypeError("filename must be a str or bytes object, or a file") if "t" in mode: + encoding = io.text_encoding(encoding) return io.TextIOWrapper(binary_file, encoding, errors, newline) else: return binary_file @@ -54,7 +54,7 @@ import abc from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, open, open_code, FileIO, BytesIO, StringIO, BufferedReader, BufferedWriter, BufferedRWPair, BufferedRandom, - IncrementalNewlineDecoder, TextIOWrapper) + IncrementalNewlineDecoder, text_encoding, TextIOWrapper) OpenWrapper = _io.open # for compatibility with _pyio diff --git a/Lib/lzma.py b/Lib/lzma.py index 0817b87..c8b1970 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -302,6 +302,7 @@ def open(filename, mode="rb", *, preset=preset, filters=filters) if "t" in mode: + encoding = io.text_encoding(encoding) return io.TextIOWrapper(binary_file, encoding, errors, newline) else: return binary_file diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 531a699..5c9284b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1241,6 +1241,8 @@ class Path(PurePath): Open the file pointed by this path and return a file object, as the built-in open() function does. """ + if "b" not in mode: + encoding = io.text_encoding(encoding) return io.open(self, mode, buffering, encoding, errors, newline, opener=self._opener) @@ -1255,6 +1257,7 @@ class Path(PurePath): """ Open the file in text mode, read it, and close the file. """ + encoding = io.text_encoding(encoding) with self.open(mode='r', encoding=encoding, errors=errors) as f: return f.read() @@ -1274,6 +1277,7 @@ class Path(PurePath): if not isinstance(data, str): raise TypeError('data must be str, not %s' % data.__class__.__name__) + encoding = io.text_encoding(encoding) with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) diff --git a/Lib/site.py b/Lib/site.py index 5f1b31e..939893e 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -170,7 +170,9 @@ def addpackage(sitedir, name, known_paths): fullname = os.path.join(sitedir, name) _trace(f"Processing .pth file: {fullname!r}") try: - f = io.TextIOWrapper(io.open_code(fullname)) + # locale encoding is not ideal especially on Windows. But we have used + # it for a long time. setuptools uses the locale encoding too. + f = io.TextIOWrapper(io.open_code(fullname), encoding="locale") except OSError: return with f: diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 4b011e4..2b78549 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -693,7 +693,7 @@ def _use_posix_spawn(): _USE_POSIX_SPAWN = _use_posix_spawn() -class Popen(object): +class Popen: """ Execute a child program in a new process. For a complete description of the arguments see the Python documentation. @@ -844,6 +844,13 @@ class Popen(object): self.text_mode = encoding or errors or text or universal_newlines + # PEP 597: We suppress the EncodingWarning in subprocess module + # for now (at Python 3.10), because we focus on files for now. + # This will be changed to encoding = io.text_encoding(encoding) + # in the future. + if self.text_mode and encoding is None: + self.encoding = encoding = "locale" + # How long to resume waiting on a child after the first ^C. # There is no right value for this. The purpose is to be polite # yet remain good for interactive users trying to exit a tool. diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 4b2547c..efcf7a7 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -543,6 +543,9 @@ def NamedTemporaryFile(mode='w+b', buffering=-1, encoding=None, if _os.name == 'nt' and delete: flags |= _os.O_TEMPORARY + if "b" not in mode: + encoding = _io.text_encoding(encoding) + (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags, output_type) try: file = _io.open(fd, mode, buffering=buffering, @@ -583,6 +586,9 @@ else: """ global _O_TMPFILE_WORKS + if "b" not in mode: + encoding = _io.text_encoding(encoding) + prefix, suffix, dir, output_type = _sanitize_params(prefix, suffix, dir) flags = _bin_openflags @@ -638,6 +644,7 @@ class SpooledTemporaryFile: if 'b' in mode: self._file = _io.BytesIO() else: + encoding = _io.text_encoding(encoding) self._file = _io.TextIOWrapper(_io.BytesIO(), encoding=encoding, errors=errors, newline=newline) diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt index 763a6c8..6c5e821 100644 --- a/Lib/test/exception_hierarchy.txt +++ b/Lib/test/exception_hierarchy.txt @@ -61,4 +61,5 @@ BaseException +-- ImportWarning +-- UnicodeWarning +-- BytesWarning + +-- EncodingWarning +-- ResourceWarning diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 6833b25..646cd06 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -389,6 +389,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'site_import': 1, 'bytes_warning': 0, + 'warn_default_encoding': 0, 'inspect': 0, 'interactive': 0, 'optimization_level': 0, diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 3768b62..c731302 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4249,6 +4249,29 @@ class MiscIOTest(unittest.TestCase): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + def test_check_encoding_warning(self): + # PEP 597: Raise warning when encoding is not specified + # and sys.flags.warn_default_encoding is set. + mod = self.io.__name__ + filename = __file__ + code = textwrap.dedent(f'''\ + import sys + from {mod} import open, TextIOWrapper + import pathlib + + with open({filename!r}) as f: # line 5 + pass + + pathlib.Path({filename!r}).read_text() # line 8 + ''') + proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code) + warnings = proc.err.splitlines() + self.assertEqual(len(warnings), 2) + self.assertTrue( + warnings[0].startswith(b"<string>:5: EncodingWarning: ")) + self.assertTrue( + warnings[1].startswith(b"<string>:8: EncodingWarning: ")) + class CMiscIOTest(MiscIOTest): io = io diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 1f5cb10..23c7bd2 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -483,7 +483,8 @@ class CompatPickleTests(unittest.TestCase): if exc in (BlockingIOError, ResourceWarning, StopAsyncIteration, - RecursionError): + RecursionError, + EncodingWarning): continue if exc is not OSError and issubclass(exc, OSError): self.assertEqual(reverse_mapping('builtins', name), diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index fca05e6..5b004c2 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -591,7 +591,8 @@ class SysModuleTest(unittest.TestCase): "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", - "hash_randomization", "isolated", "dev_mode", "utf8_mode") + "hash_randomization", "isolated", "dev_mode", "utf8_mode", + "warn_default_encoding") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr == "dev_mode" else int |