summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-04-27 20:13:46 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2016-04-27 20:13:46 (GMT)
commitb275210a3b0e04691ebd1c1d0374720be59911b9 (patch)
treede432dfc32f21bd012351ae1f92758fbe6332d6b
parent258a5d4dcb0fbf787c5e7e78b30e200534753413 (diff)
downloadcpython-b275210a3b0e04691ebd1c1d0374720be59911b9.zip
cpython-b275210a3b0e04691ebd1c1d0374720be59911b9.tar.gz
cpython-b275210a3b0e04691ebd1c1d0374720be59911b9.tar.bz2
Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
for passing to open. Original patch by Joseph Hackman.
-rw-r--r--Doc/library/fileinput.rst10
-rw-r--r--Doc/whatsnew/3.6.rst7
-rw-r--r--Lib/fileinput.py4
-rw-r--r--Lib/test/test_fileinput.py21
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
6 files changed, 40 insertions, 6 deletions
diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst
index 3433682..8efe8e3 100644
--- a/Doc/library/fileinput.rst
+++ b/Doc/library/fileinput.rst
@@ -193,10 +193,14 @@ The two following opening hooks are provided by this module:
Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)``
-.. function:: hook_encoded(encoding)
+.. function:: hook_encoded(encoding, errors=None)
Returns a hook which opens each file with :func:`open`, using the given
- *encoding* to read the file.
+ *encoding* and *errors* to read the file.
Usage example: ``fi =
- fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))``
+ fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8",
+ "surrogateescape"))``
+
+ .. versionchanged:: 3.6
+ Added the optional *errors* parameter.
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
index 99223af..be4c014 100644
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments.
(Contributed by Aviv Palivoda in :issue:`26243`.)
+fileinput
+---------
+
+:func:`~fileinput.hook_encoded` now supports the *errors* argument.
+(Contributed by Joseph Hackman in :issue:`25788`.)
+
+
Optimizations
=============
diff --git a/Lib/fileinput.py b/Lib/fileinput.py
index 1e19d24..721fe9c 100644
--- a/Lib/fileinput.py
+++ b/Lib/fileinput.py
@@ -400,9 +400,9 @@ def hook_compressed(filename, mode):
return open(filename, mode)
-def hook_encoded(encoding):
+def hook_encoded(encoding, errors=None):
def openhook(filename, mode):
- return open(filename, mode, encoding=encoding)
+ return open(filename, mode, encoding=encoding, errors=errors)
return openhook
diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py
index 4f67c25..565633f 100644
--- a/Lib/test/test_fileinput.py
+++ b/Lib/test/test_fileinput.py
@@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase):
def test(self):
encoding = object()
- result = fileinput.hook_encoded(encoding)
+ errors = object()
+ result = fileinput.hook_encoded(encoding, errors=errors)
fake_open = InvocationRecorder()
original_open = builtins.open
@@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase):
self.assertIs(args[0], filename)
self.assertIs(args[1], mode)
self.assertIs(kwargs.pop('encoding'), encoding)
+ self.assertIs(kwargs.pop('errors'), errors)
self.assertFalse(kwargs)
+ def test_errors(self):
+ with open(TESTFN, 'wb') as f:
+ f.write(b'\x80abc')
+ self.addCleanup(safe_unlink, TESTFN)
+
+ def check(errors, expected_lines):
+ with FileInput(files=TESTFN, mode='r',
+ openhook=hook_encoded('utf-8', errors=errors)) as fi:
+ lines = list(fi)
+ self.assertEqual(lines, expected_lines)
+
+ check('ignore', ['abc'])
+ with self.assertRaises(UnicodeDecodeError):
+ check('strict', ['abc'])
+ check('replace', ['\ufffdabc'])
+ check('backslashreplace', ['\\x80abc'])
+
def test_modes(self):
with open(TESTFN, 'wb') as f:
# UTF-7 is a convenient, seldom used encoding
diff --git a/Misc/ACKS b/Misc/ACKS
index dd3a567..ebc3fc6 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -538,6 +538,7 @@ Michael Guravage
Lars Gustäbel
Thomas Güttler
Jonas H.
+Joseph Hackman
Barry Haddow
Philipp Hagemeister
Paul ten Hagen
diff --git a/Misc/NEWS b/Misc/NEWS
index b6fb8f8..e68bbdf 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -256,6 +256,9 @@ Core and Builtins
Library
-------
+- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
+ for passing to open. Original patch by Joseph Hackman.
+
- Issue #26634: recursive_repr() now sets __qualname__ of wrapper. Patch by
Xiang Zhang.