From a2670565d8f5c502388378aba1fe73023fd8c8d4 Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Sat, 20 Oct 2018 03:22:31 +0300 Subject: bpo-32236: open() emits RuntimeWarning if buffering=1 for binary mode (GH-4842) If buffering=1 is specified for open() in binary mode, it is silently treated as buffering=-1 (i.e., the default buffer size). Coupled with the fact that line buffering is always supported in Python 2, such behavior caused several issues (e.g., bpo-10344, bpo-21332). Warn that line buffering is not supported if open() is called with binary mode and buffering=1. --- Doc/library/codecs.rst | 6 ++-- Lib/_pyio.py | 5 +++ Lib/codecs.py | 5 +-- Lib/subprocess.py | 11 ++++++- Lib/test/support/__init__.py | 32 ++++++++++++++++--- Lib/test/test_cmd_line_script.py | 4 +-- Lib/test/test_file.py | 37 ++++++++++++++-------- Lib/test/test_io.py | 2 +- Lib/test/test_subprocess.py | 3 +- .../2018-09-11-23-50-40.bpo-32236.3RupnN.rst | 2 ++ Modules/_io/_iomodule.c | 9 ++++++ 11 files changed, 88 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index b9c04c2..b323ab5 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -174,7 +174,7 @@ recommended approach for working with encoded text files, this module provides additional utility functions and classes that allow the use of a wider range of codecs when working with binary files: -.. function:: open(filename, mode='r', encoding=None, errors='strict', buffering=1) +.. function:: open(filename, mode='r', encoding=None, errors='strict', buffering=-1) Open an encoded file using the given *mode* and return an instance of :class:`StreamReaderWriter`, providing transparent encoding/decoding. @@ -194,8 +194,8 @@ wider range of codecs when working with binary files: *errors* may be given to define the error handling. It defaults to ``'strict'`` which causes a :exc:`ValueError` to be raised in case an encoding error occurs. - *buffering* has the same meaning as for the built-in :func:`open` function. It - defaults to line buffered. + *buffering* has the same meaning as for the built-in :func:`open` function. + It defaults to -1 which means that the default buffer size will be used. .. function:: EncodedFile(file, data_encoding, file_encoding=None, errors='strict') diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 01ef5b7..b8975ff 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -198,6 +198,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, raise ValueError("binary mode doesn't take an errors argument") if binary and newline is not None: raise ValueError("binary mode doesn't take a newline argument") + if binary and buffering == 1: + import warnings + warnings.warn("line buffering (buffering=1) isn't supported in binary " + "mode, the default buffer size will be used", + RuntimeWarning, 2) raw = FileIO(file, (creating and "x" or "") + (reading and "r" or "") + diff --git a/Lib/codecs.py b/Lib/codecs.py index a70ed20..6b028ad 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -862,7 +862,7 @@ class StreamRecoder: ### Shortcuts -def open(filename, mode='r', encoding=None, errors='strict', buffering=1): +def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): """ Open an encoded file using the given mode and return a wrapped version providing transparent encoding/decoding. @@ -883,7 +883,8 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=1): encoding error occurs. buffering has the same meaning as for the builtin open() API. - It defaults to line buffered. + It defaults to -1 which means that the default buffer size will + be used. The returned wrapped file object provides an extra attribute .encoding which allows querying the used encoding. This diff --git a/Lib/subprocess.py b/Lib/subprocess.py index c827113..5db6f0c 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -743,12 +743,21 @@ class Popen(object): self._closed_child_pipe_fds = False + if self.text_mode: + if bufsize == 1: + line_buffering = True + # Use the default buffer size for the underlying binary streams + # since they don't support line buffering. + bufsize = -1 + else: + line_buffering = False + try: if p2cwrite != -1: self.stdin = io.open(p2cwrite, 'wb', bufsize) if self.text_mode: self.stdin = io.TextIOWrapper(self.stdin, write_through=True, - line_buffering=(bufsize == 1), + line_buffering=line_buffering, encoding=encoding, errors=errors) if c2pread != -1: self.stdout = io.open(c2pread, 'rb', bufsize) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index ed0d46d..01e8935 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -107,7 +107,8 @@ __all__ = [ # threads "threading_setup", "threading_cleanup", "reap_threads", "start_threads", # miscellaneous - "check_warnings", "check_no_resource_warning", "EnvironmentVarGuard", + "check_warnings", "check_no_resource_warning", "check_no_warnings", + "EnvironmentVarGuard", "run_with_locale", "swap_item", "swap_attr", "Matcher", "set_memlimit", "SuppressCrashReport", "sortdict", "run_with_tz", "PGO", "missing_compiler_executable", "fd_count", @@ -1253,6 +1254,30 @@ def check_warnings(*filters, **kwargs): @contextlib.contextmanager +def check_no_warnings(testcase, message='', category=Warning, force_gc=False): + """Context manager to check that no warnings are emitted. + + This context manager enables a given warning within its scope + and checks that no warnings are emitted even with that warning + enabled. + + If force_gc is True, a garbage collection is attempted before checking + for warnings. This may help to catch warnings emitted when objects + are deleted, such as ResourceWarning. + + Other keyword arguments are passed to warnings.filterwarnings(). + """ + with warnings.catch_warnings(record=True) as warns: + warnings.filterwarnings('always', + message=message, + category=category) + yield + if force_gc: + gc_collect() + testcase.assertEqual(warns, []) + + +@contextlib.contextmanager def check_no_resource_warning(testcase): """Context manager to check that no ResourceWarning is emitted. @@ -1266,11 +1291,8 @@ def check_no_resource_warning(testcase): You must remove the object which may emit ResourceWarning before the end of the context manager. """ - with warnings.catch_warnings(record=True) as warns: - warnings.filterwarnings('always', category=ResourceWarning) + with check_no_warnings(testcase, category=ResourceWarning, force_gc=True): yield - gc_collect() - testcase.assertEqual(warns, []) class CleanImport(object): diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 5ec9bbb..bc812ea 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -169,10 +169,10 @@ class CmdLineTest(unittest.TestCase): @contextlib.contextmanager def interactive_python(self, separate_stderr=False): if separate_stderr: - p = spawn_python('-i', bufsize=1, stderr=subprocess.PIPE) + p = spawn_python('-i', stderr=subprocess.PIPE) stderr = p.stderr else: - p = spawn_python('-i', bufsize=1, stderr=subprocess.STDOUT) + p = spawn_python('-i', stderr=subprocess.STDOUT) stderr = p.stdout try: # Drain stderr until prompt diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index f58d1da..cd642e7 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -169,22 +169,33 @@ class OtherFileTests: f.close() self.fail("no error for invalid mode: %s" % bad_mode) + def _checkBufferSize(self, s): + try: + f = self.open(TESTFN, 'wb', s) + f.write(str(s).encode("ascii")) + f.close() + f.close() + f = self.open(TESTFN, 'rb', s) + d = int(f.read().decode("ascii")) + f.close() + f.close() + except OSError as msg: + self.fail('error setting buffer size %d: %s' % (s, str(msg))) + self.assertEqual(d, s) + def testSetBufferSize(self): # make sure that explicitly setting the buffer size doesn't cause # misbehaviour especially with repeated close() calls - for s in (-1, 0, 1, 512): - try: - f = self.open(TESTFN, 'wb', s) - f.write(str(s).encode("ascii")) - f.close() - f.close() - f = self.open(TESTFN, 'rb', s) - d = int(f.read().decode("ascii")) - f.close() - f.close() - except OSError as msg: - self.fail('error setting buffer size %d: %s' % (s, str(msg))) - self.assertEqual(d, s) + for s in (-1, 0, 512): + with support.check_no_warnings(self, + message='line buffering', + category=RuntimeWarning): + self._checkBufferSize(s) + + # test that attempts to use line buffering in binary mode cause + # a warning + with self.assertWarnsRegex(RuntimeWarning, 'line buffering'): + self._checkBufferSize(1) def testTruncateOnWindows(self): # SF bug diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index c68b2fe..abd5538 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -593,7 +593,7 @@ class IOTest(unittest.TestCase): self.large_file_ops(f) def test_with_open(self): - for bufsize in (0, 1, 100): + for bufsize in (0, 100): f = None with self.open(support.TESTFN, "wb", bufsize) as f: f.write(b"xxx") diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index c56e1b6..b0b6b06 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1136,7 +1136,8 @@ class ProcessTestCase(BaseTestCase): # line is not flushed in binary mode with bufsize=1. # we should get empty response line = b'line' + os.linesep.encode() # assume ascii-based locale - self._test_bufsize_equal_one(line, b'', universal_newlines=False) + with self.assertWarnsRegex(RuntimeWarning, 'line buffering'): + self._test_bufsize_equal_one(line, b'', universal_newlines=False) def test_leaking_fds_on_error(self): # see bug #5179: Popen leaks file descriptors to PIPEs if diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst b/Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst new file mode 100644 index 0000000..6fc1387 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-09-11-23-50-40.bpo-32236.3RupnN.rst @@ -0,0 +1,2 @@ +Warn that line buffering is not supported if :func:`open` is called with +binary mode and ``buffering=1``. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index eedca01..965c484 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -363,6 +363,15 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, goto error; } + if (binary && buffering == 1) { + if (PyErr_WarnEx(PyExc_RuntimeWarning, + "line buffering (buffering=1) isn't supported in " + "binary mode, the default buffer size will be used", + 1) < 0) { + goto error; + } + } + /* Create the Raw file stream */ { PyObject *RawIO_class = (PyObject *)&PyFileIO_Type; -- cgit v0.12