From b275210a3b0e04691ebd1c1d0374720be59911b9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 27 Apr 2016 23:13:46 +0300 Subject: Issue #25788: fileinput.hook_encoded() now supports an "errors" argument for passing to open. Original patch by Joseph Hackman. --- Doc/library/fileinput.rst | 10 +++++++--- Doc/whatsnew/3.6.rst | 7 +++++++ Lib/fileinput.py | 4 ++-- Lib/test/test_fileinput.py | 21 ++++++++++++++++++++- Misc/ACKS | 1 + Misc/NEWS | 3 +++ 6 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst index 3433682..8efe8e3 100644 --- a/Doc/library/fileinput.rst +++ b/Doc/library/fileinput.rst @@ -193,10 +193,14 @@ The two following opening hooks are provided by this module: Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` -.. function:: hook_encoded(encoding) +.. function:: hook_encoded(encoding, errors=None) Returns a hook which opens each file with :func:`open`, using the given - *encoding* to read the file. + *encoding* and *errors* to read the file. Usage example: ``fi = - fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))`` + fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8", + "surrogateescape"))`` + + .. versionchanged:: 3.6 + Added the optional *errors* parameter. diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 99223af..be4c014 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments. (Contributed by Aviv Palivoda in :issue:`26243`.) +fileinput +--------- + +:func:`~fileinput.hook_encoded` now supports the *errors* argument. +(Contributed by Joseph Hackman in :issue:`25788`.) + + Optimizations ============= diff --git a/Lib/fileinput.py b/Lib/fileinput.py index 1e19d24..721fe9c 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -400,9 +400,9 @@ def hook_compressed(filename, mode): return open(filename, mode) -def hook_encoded(encoding): +def hook_encoded(encoding, errors=None): def openhook(filename, mode): - return open(filename, mode, encoding=encoding) + return open(filename, mode, encoding=encoding, errors=errors) return openhook diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index 4f67c25..565633f 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase): def test(self): encoding = object() - result = fileinput.hook_encoded(encoding) + errors = object() + result = fileinput.hook_encoded(encoding, errors=errors) fake_open = InvocationRecorder() original_open = builtins.open @@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase): self.assertIs(args[0], filename) self.assertIs(args[1], mode) self.assertIs(kwargs.pop('encoding'), encoding) + self.assertIs(kwargs.pop('errors'), errors) self.assertFalse(kwargs) + def test_errors(self): + with open(TESTFN, 'wb') as f: + f.write(b'\x80abc') + self.addCleanup(safe_unlink, TESTFN) + + def check(errors, expected_lines): + with FileInput(files=TESTFN, mode='r', + openhook=hook_encoded('utf-8', errors=errors)) as fi: + lines = list(fi) + self.assertEqual(lines, expected_lines) + + check('ignore', ['abc']) + with self.assertRaises(UnicodeDecodeError): + check('strict', ['abc']) + check('replace', ['\ufffdabc']) + check('backslashreplace', ['\\x80abc']) + def test_modes(self): with open(TESTFN, 'wb') as f: # UTF-7 is a convenient, seldom used encoding diff --git a/Misc/ACKS b/Misc/ACKS index dd3a567..ebc3fc6 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -538,6 +538,7 @@ Michael Guravage Lars Gustäbel Thomas Güttler Jonas H. +Joseph Hackman Barry Haddow Philipp Hagemeister Paul ten Hagen diff --git a/Misc/NEWS b/Misc/NEWS index b6fb8f8..e68bbdf 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -256,6 +256,9 @@ Core and Builtins Library ------- +- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument + for passing to open. Original patch by Joseph Hackman. + - Issue #26634: recursive_repr() now sets __qualname__ of wrapper. Patch by Xiang Zhang. -- cgit v0.12