summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
author6t8k <58048945+6t8k@users.noreply.github.com>2024-02-17 11:16:06 (GMT)
committerGitHub <noreply@github.com>2024-02-17 11:16:06 (GMT)
commit26800cf25a0970d46934fa9a881c0ef6881d642b (patch)
tree94787f4406c4ab79490877d58612437787fd2068 /Lib
parentd5a30a1777f04523c7b151b894e999f5714d8e96 (diff)
downloadcpython-26800cf25a0970d46934fa9a881c0ef6881d642b.zip
cpython-26800cf25a0970d46934fa9a881c0ef6881d642b.tar.gz
cpython-26800cf25a0970d46934fa9a881c0ef6881d642b.tar.bz2
gh-95782: Fix io.BufferedReader.tell() etc. being able to return offsets < 0 (GH-99709)
lseek() always returns 0 for character pseudo-devices like `/dev/urandom` (for other non-regular files, e.g. `/dev/stdin`, it always returns -1, to which CPython reacts by raising appropriate exceptions). They are thus technically seekable despite not having seek semantics. When calling read() on e.g. an instance of `io.BufferedReader` that wraps such a file, `BufferedReader` reads ahead, filling its buffer, creating a discrepancy between the number of bytes read and the internal `tell()` always returning 0, which previously resulted in e.g. `BufferedReader.tell()` or `BufferedReader.seek()` being able to return positions < 0 even though these are supposed to be always >= 0. Invariably keep the return value non-negative by returning max(former_return_value, 0) instead, and add some corresponding tests.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/_pyio.py3
-rw-r--r--Lib/test/test_io.py47
2 files changed, 48 insertions, 2 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 8a0d0dc..a3fede6 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -1197,7 +1197,8 @@ class BufferedReader(_BufferedIOMixin):
return written
def tell(self):
- return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
+ # GH-95782: Keep return value non-negative
+ return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0)
def seek(self, pos, whence=0):
if whence not in valid_seek_flags:
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index cc387af..5491c05 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -257,6 +257,27 @@ class PyMockUnseekableIO(MockUnseekableIO, pyio.BytesIO):
UnsupportedOperation = pyio.UnsupportedOperation
+class MockCharPseudoDevFileIO(MockFileIO):
+ # GH-95782
+ # ftruncate() does not work on these special files (and CPython then raises
+ # appropriate exceptions), so truncate() does not have to be accounted for
+ # here.
+ def __init__(self, data):
+ super().__init__(data)
+
+ def seek(self, *args):
+ return 0
+
+ def tell(self, *args):
+ return 0
+
+class CMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, io.BytesIO):
+ pass
+
+class PyMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, pyio.BytesIO):
+ pass
+
+
class MockNonBlockWriterIO:
def __init__(self):
@@ -1648,6 +1669,30 @@ class BufferedReaderTest(unittest.TestCase, CommonBufferedTests):
self.assertRaises(self.UnsupportedOperation, bufio.truncate)
self.assertRaises(self.UnsupportedOperation, bufio.truncate, 0)
+ def test_tell_character_device_file(self):
+ # GH-95782
+ # For the (former) bug in BufferedIO to manifest, the wrapped IO obj
+ # must be able to produce at least 2 bytes.
+ raw = self.MockCharPseudoDevFileIO(b"12")
+ buf = self.tp(raw)
+ self.assertEqual(buf.tell(), 0)
+ self.assertEqual(buf.read(1), b"1")
+ self.assertEqual(buf.tell(), 0)
+
+ def test_seek_character_device_file(self):
+ raw = self.MockCharPseudoDevFileIO(b"12")
+ buf = self.tp(raw)
+ self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
+ self.assertEqual(buf.seek(1, io.SEEK_SET), 0)
+ self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
+ self.assertEqual(buf.read(1), b"1")
+
+ # In the C implementation, tell() sets the BufferedIO's abs_pos to 0,
+ # which means that the next seek() could return a negative offset if it
+ # does not sanity-check:
+ self.assertEqual(buf.tell(), 0)
+ self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
+
class CBufferedReaderTest(BufferedReaderTest, SizeofTest):
tp = io.BufferedReader
@@ -4880,7 +4925,7 @@ def load_tests(loader, tests, pattern):
# classes in the __dict__ of each test.
mocks = (MockRawIO, MisbehavedRawIO, MockFileIO, CloseFailureIO,
MockNonBlockWriterIO, MockUnseekableIO, MockRawIOWithoutRead,
- SlowFlushRawIO)
+ SlowFlushRawIO, MockCharPseudoDevFileIO)
all_members = io.__all__
c_io_ns = {name : getattr(io, name) for name in all_members}
py_io_ns = {name : getattr(pyio, name) for name in all_members}