diff options
author | Cody Maloney <cmaloney@users.noreply.github.com> | 2024-07-04 07:17:00 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-04 07:17:00 (GMT) |
commit | 2f5f19e783385ec5312f7054827ccf1cdb6e14ef (patch) | |
tree | cd4c0b5b1f9b4b6dd0d96ed995fb972838de5eae /Lib/_pyio.py | |
parent | 9728ead36181fb3f0a4b2e8a7291a3e0a702b952 (diff) | |
download | cpython-2f5f19e783385ec5312f7054827ccf1cdb6e14ef.zip cpython-2f5f19e783385ec5312f7054827ccf1cdb6e14ef.tar.gz cpython-2f5f19e783385ec5312f7054827ccf1cdb6e14ef.tar.bz2 |
gh-120754: Reduce system calls in full-file FileIO.readall() case (#120755)
This reduces the system call count of a simple program[0] that reads all
the `.rst` files in Doc by over 10% (5706 -> 4734 system calls on my
linux system, 5813 -> 4875 on my macOS)
This reduces the number of `fstat()` calls always and seek calls most
the time. Stat was always called twice, once at open (to error early on
directories), and a second time to get the size of the file to be able
to read the whole file in one read. Now the size is cached with the
first call.
The code keeps an optimization that if the user had previously read a
lot of data, the current position is subtracted from the number of bytes
to read. That is somewhat expensive so only do it on larger files,
otherwise just try and read the extra bytes and resize the PyBytes as
needeed.
I built a little test program to validate the behavior + assumptions
around relative costs and then ran it under `strace` to get a log of the
system calls. Full samples below[1].
After the changes, this is everything in one `filename.read_text()`:
```python3
openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3`
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0`
ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR) = 0
read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343
read(3, "", 1) = 0
close(3) = 0
```
This does make some tradeoffs
1. If the file size changes between open() and readall(), this will
still get all the data but might have more read calls.
2. I experimented with avoiding the stat + cached result for small files
in general, but on my dev workstation at least that tended to reduce
performance compared to using the fstat().
[0]
```python3
from pathlib import Path
nlines = []
for filename in Path("cpython/Doc").glob("**/*.rst"):
nlines.append(len(filename.read_text()))
```
[1]
Before small file:
```
openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0
ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR) = 0
lseek(3, 0, SEEK_CUR) = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0
read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343
read(3, "", 1) = 0
close(3) = 0
```
After small file:
```
openat(AT_FDCWD, "cpython/Doc/howto/clinic.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=343, ...}) = 0
ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR) = 0
read(3, ":orphan:\n\n.. This page is retain"..., 344) = 343
read(3, "", 1) = 0
close(3) = 0
```
Before large file:
```
openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0
ioctl(3, TCGETS, 0x7ffe52525930) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR) = 0
lseek(3, 0, SEEK_CUR) = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0
read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104
read(3, "", 1) = 0
close(3) = 0
```
After large file:
```
openat(AT_FDCWD, "cpython/Doc/c-api/typeobj.rst", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=133104, ...}) = 0
ioctl(3, TCGETS, 0x7ffdfac04b40) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR) = 0
lseek(3, 0, SEEK_CUR) = 0
read(3, ".. highlight:: c\n\n.. _type-struc"..., 133105) = 133104
read(3, "", 1) = 0
close(3) = 0
```
Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com>
Co-authored-by: Victor Stinner <vstinner@python.org>
Diffstat (limited to 'Lib/_pyio.py')
-rw-r--r-- | Lib/_pyio.py | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 7d298e1..75b5ad1 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1577,6 +1577,7 @@ class FileIO(RawIOBase): self._blksize = getattr(fdfstat, 'st_blksize', 0) if self._blksize <= 1: self._blksize = DEFAULT_BUFFER_SIZE + self._estimated_size = fdfstat.st_size if _setmode: # don't translate newlines (\r\n <=> \n) @@ -1654,14 +1655,18 @@ class FileIO(RawIOBase): """ self._checkClosed() self._checkReadable() - bufsize = DEFAULT_BUFFER_SIZE - try: - pos = os.lseek(self._fd, 0, SEEK_CUR) - end = os.fstat(self._fd).st_size - if end >= pos: - bufsize = end - pos + 1 - except OSError: - pass + if self._estimated_size <= 0: + bufsize = DEFAULT_BUFFER_SIZE + else: + bufsize = self._estimated_size + 1 + + if self._estimated_size > 65536: + try: + pos = os.lseek(self._fd, 0, SEEK_CUR) + if self._estimated_size >= pos: + bufsize = self._estimated_size - pos + 1 + except OSError: + pass result = bytearray() while True: @@ -1737,6 +1742,7 @@ class FileIO(RawIOBase): if size is None: size = self.tell() os.ftruncate(self._fd, size) + self._estimated_size = size return size def close(self): |