diff options
author | Inada Naoki <songofacandy@gmail.com> | 2021-04-14 05:12:58 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-14 05:12:58 (GMT) |
commit | 333d10cbb53dd5f28d76f659a49bf0735f8509d8 (patch) | |
tree | ddc1e42d033ce82d4f8b29fd281eb6754ea85d2b | |
parent | 133705b85cc25d1e6684d32f8943ca288fadfda0 (diff) | |
download | cpython-333d10cbb53dd5f28d76f659a49bf0735f8509d8.zip cpython-333d10cbb53dd5f28d76f659a49bf0735f8509d8.tar.gz cpython-333d10cbb53dd5f28d76f659a49bf0735f8509d8.tar.bz2 |
bpo-43712 : fileinput: Add encoding parameter (GH-25272)
-rw-r--r-- | Doc/library/fileinput.rst | 35 | ||||
-rw-r--r-- | Doc/whatsnew/3.10.rst | 11 | ||||
-rw-r--r-- | Lib/fileinput.py | 58 | ||||
-rw-r--r-- | Lib/test/test_fileinput.py | 50 | ||||
-rw-r--r-- | Misc/ACKS | 1 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2021-04-08-12-25-08.bpo-43712.f8WXCX.rst | 2 |
6 files changed, 119 insertions, 38 deletions
diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst index cc4039a..8196400 100644 --- a/Doc/library/fileinput.rst +++ b/Doc/library/fileinput.rst @@ -18,7 +18,7 @@ write one file see :func:`open`. The typical use is:: import fileinput - for line in fileinput.input(): + for line in fileinput.input(encoding="utf-8"): process(line) This iterates over the lines of all files listed in ``sys.argv[1:]``, defaulting @@ -49,13 +49,14 @@ a file may not have one. You can control how files are opened by providing an opening hook via the *openhook* parameter to :func:`fileinput.input` or :class:`FileInput()`. The hook must be a function that takes two arguments, *filename* and *mode*, and -returns an accordingly opened file-like object. Two useful hooks are already -provided by this module. +returns an accordingly opened file-like object. If *encoding* and/or *errors* +are specified, they will be passed to the hook as aditional keyword arguments. +This module provides a :func:`hook_encoded` to support compressed files. The following function is the primary interface of this module: -.. function:: input(files=None, inplace=False, backup='', *, mode='r', openhook=None) +.. function:: input(files=None, inplace=False, backup='', *, mode='r', openhook=None, encoding=None, errors=None) Create an instance of the :class:`FileInput` class. The instance will be used as global state for the functions of this module, and is also returned to use @@ -66,7 +67,7 @@ The following function is the primary interface of this module: :keyword:`with` statement. In this example, *input* is closed after the :keyword:`!with` statement is exited, even if an exception occurs:: - with fileinput.input(files=('spam.txt', 'eggs.txt')) as f: + with fileinput.input(files=('spam.txt', 'eggs.txt'), encoding="utf-8") as f: for line in f: process(line) @@ -76,6 +77,9 @@ The following function is the primary interface of this module: .. versionchanged:: 3.8 The keyword parameters *mode* and *openhook* are now keyword-only. + .. versionchanged:: 3.10 + The keyword-only parameter *encoding* and *errors* are added. + The following functions use the global state created by :func:`fileinput.input`; if there is no active state, :exc:`RuntimeError` is raised. @@ -137,7 +141,7 @@ The class which implements the sequence behavior provided by the module is available for subclassing as well: -.. class:: FileInput(files=None, inplace=False, backup='', *, mode='r', openhook=None) +.. class:: FileInput(files=None, inplace=False, backup='', *, mode='r', openhook=None, encoding=None, errors=None) Class :class:`FileInput` is the implementation; its methods :meth:`filename`, :meth:`fileno`, :meth:`lineno`, :meth:`filelineno`, :meth:`isfirstline`, @@ -155,6 +159,8 @@ available for subclassing as well: *filename* and *mode*, and returns an accordingly opened file-like object. You cannot use *inplace* and *openhook* together. + You can specify *encoding* and *errors* that is passed to :func:`open` or *openhook*. + A :class:`FileInput` instance can be used as a context manager in the :keyword:`with` statement. In this example, *input* is closed after the :keyword:`!with` statement is exited, even if an exception occurs:: @@ -162,7 +168,6 @@ available for subclassing as well: with FileInput(files=('spam.txt', 'eggs.txt')) as input: process(input) - .. versionchanged:: 3.2 Can be used as a context manager. @@ -175,6 +180,8 @@ available for subclassing as well: .. versionchanged:: 3.8 The keyword parameter *mode* and *openhook* are now keyword-only. + .. versionchanged:: 3.10 + The keyword-only parameter *encoding* and *errors* are added. **Optional in-place filtering:** if the keyword argument ``inplace=True`` is @@ -191,14 +198,20 @@ when standard input is read. The two following opening hooks are provided by this module: -.. function:: hook_compressed(filename, mode) +.. function:: hook_compressed(filename, mode, *, encoding=None, errors=None) Transparently opens files compressed with gzip and bzip2 (recognized by the extensions ``'.gz'`` and ``'.bz2'``) using the :mod:`gzip` and :mod:`bz2` modules. If the filename extension is not ``'.gz'`` or ``'.bz2'``, the file is opened normally (ie, using :func:`open` without any decompression). - Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` + The *encoding* and *errors* values are passed to to :class:`io.TextIOWrapper` + for compressed files and open for normal files. + + Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed, encoding="utf-8")`` + + .. versionchanged:: 3.10 + The keyword-only parameter *encoding* and *errors* are added. .. function:: hook_encoded(encoding, errors=None) @@ -212,3 +225,7 @@ The two following opening hooks are provided by this module: .. versionchanged:: 3.6 Added the optional *errors* parameter. + + .. deprecated:: 3.10 + This function is deprecated since :func:`input` and :class:`FileInput` + now have *encoding* and *errors* parameters. diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index b6e954c..21f9128 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -760,6 +760,17 @@ enum module constants have a :func:`repr` of ``module_name.member_name``. (Contributed by Ethan Furman in :issue:`40066`.) +fileinput +--------- + +Added *encoding* and *errors* parameters in :func:`fileinput.input` and +:class:`fileinput.FileInput`. +(Contributed by Inada Naoki in :issue:`43712`.) + +:func:`fileinput.hook_compressed` now returns :class:`TextIOWrapper` object +when *mode* is "r" and file is compressed, like uncompressed files. +(Contributed by Inada Naoki in :issue:`5758`.) + gc -- diff --git a/Lib/fileinput.py b/Lib/fileinput.py index 0c31f93..6218c4f 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -3,7 +3,7 @@ Typical use is: import fileinput - for line in fileinput.input(): + for line in fileinput.input(encoding="utf-8"): process(line) This iterates over the lines of all files listed in sys.argv[1:], @@ -63,15 +63,9 @@ file remains around; by default, the extension is ".bak" and it is deleted when the output file is closed. In-place filtering is disabled when standard input is read. XXX The current implementation does not work for MS-DOS 8+3 filesystems. - -XXX Possible additions: - -- optional getopt argument processing -- isatty() -- read(), read(size), even readlines() - """ +import io import sys, os from types import GenericAlias @@ -81,7 +75,8 @@ __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno", _state = None -def input(files=None, inplace=False, backup="", *, mode="r", openhook=None): +def input(files=None, inplace=False, backup="", *, mode="r", openhook=None, + encoding=None, errors=None): """Return an instance of the FileInput class, which can be iterated. The parameters are passed to the constructor of the FileInput class. @@ -91,7 +86,8 @@ def input(files=None, inplace=False, backup="", *, mode="r", openhook=None): global _state if _state and _state._file: raise RuntimeError("input() already active") - _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook) + _state = FileInput(files, inplace, backup, mode=mode, openhook=openhook, + encoding=encoding, errors=errors) return _state def close(): @@ -186,7 +182,7 @@ class FileInput: """ def __init__(self, files=None, inplace=False, backup="", *, - mode="r", openhook=None): + mode="r", openhook=None, encoding=None, errors=None): if isinstance(files, str): files = (files,) elif isinstance(files, os.PathLike): @@ -209,6 +205,16 @@ class FileInput: self._file = None self._isstdin = False self._backupfilename = None + self._encoding = encoding + self._errors = errors + + # We can not use io.text_encoding() here because old openhook doesn't + # take encoding parameter. + if "b" not in mode and encoding is None and sys.flags.warn_default_encoding: + import warnings + warnings.warn("'encoding' argument not specified.", + EncodingWarning, 2) + # restrict mode argument to reading modes if mode not in ('r', 'rU', 'U', 'rb'): raise ValueError("FileInput opening mode must be one of " @@ -362,9 +368,20 @@ class FileInput: else: # This may raise OSError if self._openhook: - self._file = self._openhook(self._filename, self._mode) + # Custom hooks made previous to Python 3.10 didn't have + # encoding argument + if self._encoding is None: + self._file = self._openhook(self._filename, self._mode) + else: + self._file = self._openhook( + self._filename, self._mode, encoding=self._encoding, errors=self._errors) else: - self._file = open(self._filename, self._mode) + # EncodingWarning is emitted in __init__() already + if "b" not in self._mode: + encoding = self._encoding or "locale" + else: + encoding = None + self._file = open(self._filename, self._mode, encoding=encoding, errors=self._errors) self._readline = self._file.readline # hide FileInput._readline return self._readline() @@ -395,16 +412,23 @@ class FileInput: __class_getitem__ = classmethod(GenericAlias) -def hook_compressed(filename, mode): +def hook_compressed(filename, mode, *, encoding=None, errors=None): + if encoding is None: # EncodingWarning is emitted in FileInput() already. + encoding = "locale" ext = os.path.splitext(filename)[1] if ext == '.gz': import gzip - return gzip.open(filename, mode) + stream = gzip.open(filename, mode) elif ext == '.bz2': import bz2 - return bz2.BZ2File(filename, mode) + stream = bz2.BZ2File(filename, mode) else: - return open(filename, mode) + return open(filename, mode, encoding=encoding, errors=errors) + + # gzip and bz2 are binary mode by default. + if "b" not in mode: + stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors) + return stream def hook_encoded(encoding, errors=None): diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index d5edf74..d01d3962 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -2,6 +2,7 @@ Tests for fileinput module. Nick Mathewson ''' +import io import os import sys import re @@ -238,7 +239,7 @@ class FileInputTests(BaseTests, unittest.TestCase): # try opening in universal newline mode t1 = self.writeTmp(b"A\nB\r\nC\rD", mode="wb") with warnings_helper.check_warnings(('', DeprecationWarning)): - fi = FileInput(files=t1, mode="U") + fi = FileInput(files=t1, mode="U", encoding="utf-8") with warnings_helper.check_warnings(('', DeprecationWarning)): lines = list(fi) self.assertEqual(lines, ["A\n", "B\n", "C\n", "D"]) @@ -278,7 +279,7 @@ class FileInputTests(BaseTests, unittest.TestCase): class CustomOpenHook: def __init__(self): self.invoked = False - def __call__(self, *args): + def __call__(self, *args, **kargs): self.invoked = True return open(*args) @@ -334,6 +335,14 @@ class FileInputTests(BaseTests, unittest.TestCase): with open(temp_file, 'rb') as f: self.assertEqual(f.read(), b'New line.') + def test_file_hook_backward_compatibility(self): + def old_hook(filename, mode): + return io.StringIO("I used to receive only filename and mode") + t = self.writeTmp("\n") + with FileInput([t], openhook=old_hook) as fi: + result = fi.readline() + self.assertEqual(result, "I used to receive only filename and mode") + def test_context_manager(self): t1 = self.writeTmp("A\nB\nC") t2 = self.writeTmp("D\nE\nF") @@ -529,12 +538,14 @@ class MockFileInput: """A class that mocks out fileinput.FileInput for use during unit tests""" def __init__(self, files=None, inplace=False, backup="", *, - mode="r", openhook=None): + mode="r", openhook=None, encoding=None, errors=None): self.files = files self.inplace = inplace self.backup = backup self.mode = mode self.openhook = openhook + self.encoding = encoding + self.errors = errors self._file = None self.invocation_counts = collections.defaultdict(lambda: 0) self.return_values = {} @@ -637,10 +648,11 @@ class Test_fileinput_input(BaseFileInputGlobalMethodsTest): backup = object() mode = object() openhook = object() + encoding = object() # call fileinput.input() with different values for each argument result = fileinput.input(files=files, inplace=inplace, backup=backup, - mode=mode, openhook=openhook) + mode=mode, openhook=openhook, encoding=encoding) # ensure fileinput._state was set to the returned object self.assertIs(result, fileinput._state, "fileinput._state") @@ -863,11 +875,15 @@ class Test_fileinput_isstdin(BaseFileInputGlobalMethodsTest): self.assertIs(fileinput._state, instance) class InvocationRecorder: + def __init__(self): self.invocation_count = 0 + def __call__(self, *args, **kwargs): self.invocation_count += 1 self.last_invocation = (args, kwargs) + return io.BytesIO(b'some bytes') + class Test_hook_compressed(unittest.TestCase): """Unit tests for fileinput.hook_compressed()""" @@ -886,33 +902,43 @@ class Test_hook_compressed(unittest.TestCase): original_open = gzip.open gzip.open = self.fake_open try: - result = fileinput.hook_compressed("test.gz", 3) + result = fileinput.hook_compressed("test.gz", "3") finally: gzip.open = original_open self.assertEqual(self.fake_open.invocation_count, 1) - self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {})) + self.assertEqual(self.fake_open.last_invocation, (("test.gz", "3"), {})) + + @unittest.skipUnless(gzip, "Requires gzip and zlib") + def test_gz_with_encoding_fake(self): + original_open = gzip.open + gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string') + try: + result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8") + finally: + gzip.open = original_open + self.assertEqual(list(result), ['Ex-binary string']) @unittest.skipUnless(bz2, "Requires bz2") def test_bz2_ext_fake(self): original_open = bz2.BZ2File bz2.BZ2File = self.fake_open try: - result = fileinput.hook_compressed("test.bz2", 4) + result = fileinput.hook_compressed("test.bz2", "4") finally: bz2.BZ2File = original_open self.assertEqual(self.fake_open.invocation_count, 1) - self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {})) + self.assertEqual(self.fake_open.last_invocation, (("test.bz2", "4"), {})) def test_blah_ext(self): - self.do_test_use_builtin_open("abcd.blah", 5) + self.do_test_use_builtin_open("abcd.blah", "5") def test_gz_ext_builtin(self): - self.do_test_use_builtin_open("abcd.Gz", 6) + self.do_test_use_builtin_open("abcd.Gz", "6") def test_bz2_ext_builtin(self): - self.do_test_use_builtin_open("abcd.Bz2", 7) + self.do_test_use_builtin_open("abcd.Bz2", "7") def do_test_use_builtin_open(self, filename, mode): original_open = self.replace_builtin_open(self.fake_open) @@ -923,7 +949,7 @@ class Test_hook_compressed(unittest.TestCase): self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, - ((filename, mode), {})) + ((filename, mode), {'encoding': 'locale', 'errors': None})) @staticmethod def replace_builtin_open(new_open_func): @@ -33,6 +33,7 @@ Nir Aides Akira Yaniv Aknin Jyrki Alakuijala +Tatiana Al-Chueyr Steve Alexander Fred Allen Jeff Allen diff --git a/Misc/NEWS.d/next/Library/2021-04-08-12-25-08.bpo-43712.f8WXCX.rst b/Misc/NEWS.d/next/Library/2021-04-08-12-25-08.bpo-43712.f8WXCX.rst new file mode 100644 index 0000000..d11df0d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-04-08-12-25-08.bpo-43712.f8WXCX.rst @@ -0,0 +1,2 @@ +Add ``encoding`` and ``errors`` parameters to :func:`fileinput.input` and +:class:`fileinput.FileInput`. |