From 35d8ac7cd7ed6cd3d84af721dce970da59bd5f68 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 16 Aug 2024 13:52:41 -0700 Subject: GH-120754: Disable buffering in Path.read_bytes (#122111) `Path.read_bytes()` is used to read a whole file. buffering / BufferedIO is focused around making small, possibly interleaved, read/write efficient which doesn't add value in this case. On my Mac, running the benchmark: ```python import pyperf from pathlib import Path def read_all(all_paths): for p in all_paths: p.read_bytes() def read_file(path_obj): path_obj.read_bytes() all_rst = list(Path("Doc").glob("**/*.rst")) all_py = list(Path(".").glob("**/*.py")) assert all_rst, "Should have found rst files" assert all_py, "Should have found python source files" runner = pyperf.Runner() runner.bench_func("read_file_small", read_file, Path("Doc/howto/clinic.rst")) runner.bench_func("read_file_large", read_file, Path("Doc/c-api/typeobj.rst")) ``` before: ```python ..................... read_file_small: Mean +- std dev: 6.80 us +- 0.07 us ..................... read_file_large: Mean +- std dev: 10.8 us +- 0.2 us ```` after: ```python ..................... read_file_small: Mean +- std dev: 5.67 us +- 0.05 us ..................... read_file_large: Mean +- std dev: 9.77 us +- 0.52 us ``` --- Lib/pathlib/_abc.py | 2 +- Lib/test/test_pathlib/test_pathlib_abc.py | 2 +- Misc/NEWS.d/next/Library/2024-07-22-08-57-28.gh-issue-120754.Eo5puP.rst | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-22-08-57-28.gh-issue-120754.Eo5puP.rst diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 500846d..720756c 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -585,7 +585,7 @@ class PathBase(PurePathBase): """ Open the file in bytes mode, read it, and close the file. """ - with self.open(mode='rb') as f: + with self.open(mode='rb', buffering=0) as f: return f.read() def read_text(self, encoding=None, errors=None, newline=None): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 629a1d4..f222fd5 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1464,7 +1464,7 @@ class DummyPath(PathBase): def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): - if buffering != -1: + if buffering != -1 and not (buffering == 0 and 'b' in mode): raise NotImplementedError path_obj = self.resolve() path = str(path_obj) diff --git a/Misc/NEWS.d/next/Library/2024-07-22-08-57-28.gh-issue-120754.Eo5puP.rst b/Misc/NEWS.d/next/Library/2024-07-22-08-57-28.gh-issue-120754.Eo5puP.rst new file mode 100644 index 0000000..daf1841 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-22-08-57-28.gh-issue-120754.Eo5puP.rst @@ -0,0 +1 @@ +``Pathlib.read_bytes`` no longer opens the file in Python's buffered I/O mode. This reduces overheads as the code reads a file in whole leading to a modest speedup. -- cgit v0.12