diff options
author | 6t8k <58048945+6t8k@users.noreply.github.com> | 2024-02-17 11:16:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-17 11:16:06 (GMT) |
commit | 26800cf25a0970d46934fa9a881c0ef6881d642b (patch) | |
tree | 94787f4406c4ab79490877d58612437787fd2068 /Lib | |
parent | d5a30a1777f04523c7b151b894e999f5714d8e96 (diff) | |
download | cpython-26800cf25a0970d46934fa9a881c0ef6881d642b.zip cpython-26800cf25a0970d46934fa9a881c0ef6881d642b.tar.gz cpython-26800cf25a0970d46934fa9a881c0ef6881d642b.tar.bz2 |
gh-95782: Fix io.BufferedReader.tell() etc. being able to return offsets < 0 (GH-99709)
lseek() always returns 0 for character pseudo-devices like
`/dev/urandom` (for other non-regular files, e.g. `/dev/stdin`, it
always returns -1, to which CPython reacts by raising appropriate
exceptions). They are thus technically seekable despite not having seek
semantics.
When calling read() on e.g. an instance of `io.BufferedReader` that
wraps such a file, `BufferedReader` reads ahead, filling its buffer,
creating a discrepancy between the number of bytes read and the internal
`tell()` always returning 0, which previously resulted in e.g.
`BufferedReader.tell()` or `BufferedReader.seek()` being able to return
positions < 0 even though these are supposed to be always >= 0.
Invariably keep the return value non-negative by returning
max(former_return_value, 0) instead, and add some corresponding tests.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_pyio.py | 3 | ||||
-rw-r--r-- | Lib/test/test_io.py | 47 |
2 files changed, 48 insertions, 2 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 8a0d0dc..a3fede6 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1197,7 +1197,8 @@ class BufferedReader(_BufferedIOMixin): return written def tell(self): - return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos + # GH-95782: Keep return value non-negative + return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0) def seek(self, pos, whence=0): if whence not in valid_seek_flags: diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index cc387af..5491c05 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -257,6 +257,27 @@ class PyMockUnseekableIO(MockUnseekableIO, pyio.BytesIO): UnsupportedOperation = pyio.UnsupportedOperation +class MockCharPseudoDevFileIO(MockFileIO): + # GH-95782 + # ftruncate() does not work on these special files (and CPython then raises + # appropriate exceptions), so truncate() does not have to be accounted for + # here. + def __init__(self, data): + super().__init__(data) + + def seek(self, *args): + return 0 + + def tell(self, *args): + return 0 + +class CMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, io.BytesIO): + pass + +class PyMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, pyio.BytesIO): + pass + + class MockNonBlockWriterIO: def __init__(self): @@ -1648,6 +1669,30 @@ class BufferedReaderTest(unittest.TestCase, CommonBufferedTests): self.assertRaises(self.UnsupportedOperation, bufio.truncate) self.assertRaises(self.UnsupportedOperation, bufio.truncate, 0) + def test_tell_character_device_file(self): + # GH-95782 + # For the (former) bug in BufferedIO to manifest, the wrapped IO obj + # must be able to produce at least 2 bytes. + raw = self.MockCharPseudoDevFileIO(b"12") + buf = self.tp(raw) + self.assertEqual(buf.tell(), 0) + self.assertEqual(buf.read(1), b"1") + self.assertEqual(buf.tell(), 0) + + def test_seek_character_device_file(self): + raw = self.MockCharPseudoDevFileIO(b"12") + buf = self.tp(raw) + self.assertEqual(buf.seek(0, io.SEEK_CUR), 0) + self.assertEqual(buf.seek(1, io.SEEK_SET), 0) + self.assertEqual(buf.seek(0, io.SEEK_CUR), 0) + self.assertEqual(buf.read(1), b"1") + + # In the C implementation, tell() sets the BufferedIO's abs_pos to 0, + # which means that the next seek() could return a negative offset if it + # does not sanity-check: + self.assertEqual(buf.tell(), 0) + self.assertEqual(buf.seek(0, io.SEEK_CUR), 0) + class CBufferedReaderTest(BufferedReaderTest, SizeofTest): tp = io.BufferedReader @@ -4880,7 +4925,7 @@ def load_tests(loader, tests, pattern): # classes in the __dict__ of each test. mocks = (MockRawIO, MisbehavedRawIO, MockFileIO, CloseFailureIO, MockNonBlockWriterIO, MockUnseekableIO, MockRawIOWithoutRead, - SlowFlushRawIO) + SlowFlushRawIO, MockCharPseudoDevFileIO) all_members = io.__all__ c_io_ns = {name : getattr(io, name) for name in all_members} py_io_ns = {name : getattr(pyio, name) for name in all_members} |