diff options
-rw-r--r-- | Doc/library/io.rst | 78 | ||||
-rw-r--r-- | Lib/test/test_io.py | 35 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 68 |
4 files changed, 155 insertions, 30 deletions
diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 22437cb..9fb241d 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -54,12 +54,6 @@ In-memory text streams are also available as :class:`StringIO` objects:: The text stream API is described in detail in the documentation for the :class:`TextIOBase`. -.. note:: - - Text I/O over a binary storage (such as a file) is significantly slower than - binary I/O over the same storage. This can become noticeable if you handle - huge amounts of text data (for example very large log files). - Binary I/O ^^^^^^^^^^ @@ -506,8 +500,8 @@ Raw File I/O Buffered Streams ^^^^^^^^^^^^^^^^ -In many situations, buffered I/O streams will provide higher performance -(bandwidth and latency) than raw I/O streams. Their API is also more usable. +Buffered I/O streams provide a higher-level interface to an I/O device +than raw I/O does. .. class:: BytesIO([initial_bytes]) @@ -766,14 +760,72 @@ Text I/O # .getvalue() will now raise an exception. output.close() - .. note:: - - :class:`StringIO` uses a native text storage and doesn't suffer from the - performance issues of other text streams, such as those based on - :class:`TextIOWrapper`. .. class:: IncrementalNewlineDecoder A helper codec that decodes newlines for universal newlines mode. It inherits :class:`codecs.IncrementalDecoder`. + +Advanced topics +--------------- + +Here we will discuss several advanced topics pertaining to the concrete +I/O implementations described above. + +Performance +^^^^^^^^^^^ + +Binary I/O +"""""""""" + +By reading and writing only large chunks of data even when the user asks +for a single byte, buffered I/O is designed to hide any inefficiency in +calling and executing the operating system's unbuffered I/O routines. The +gain will vary very much depending on the OS and the kind of I/O which is +performed (for example, on some contemporary OSes such as Linux, unbuffered +disk I/O can be as fast as buffered I/O). The bottom line, however, is +that buffered I/O will offer you predictable performance regardless of the +platform and the backing device. Therefore, it is most always preferable to +use buffered I/O rather than unbuffered I/O. + +Text I/O +"""""""" + +Text I/O over a binary storage (such as a file) is significantly slower than +binary I/O over the same storage, because it implies conversions from +unicode to binary data using a character codec. This can become noticeable +if you handle huge amounts of text data (for example very large log files). + +:class:`StringIO`, however, is a native in-memory unicode container and will +exhibit similar speed to :class:`BytesIO`. + +Multi-threading +^^^^^^^^^^^^^^^ + +:class:`FileIO` objects are thread-safe to the extent that the operating +system calls (such as ``read(2)`` under Unix) they are wrapping are thread-safe +too. + +Binary buffered objects (instances of :class:`BufferedReader`, +:class:`BufferedWriter`, :class:`BufferedRandom` and :class:`BufferedRWPair`) +protect their internal structures using a lock; it is therefore safe to call +them from multiple threads at once. + +:class:`TextIOWrapper` objects are not thread-safe. + +Reentrancy +^^^^^^^^^^ + +Binary buffered objects (instances of :class:`BufferedReader`, +:class:`BufferedWriter`, :class:`BufferedRandom` and :class:`BufferedRWPair`) +are not reentrant. While reentrant calls will not happen in normal situations, +they can arise if you are doing I/O in a :mod:`signal` handler. If it is +attempted to enter a buffered object again while already being accessed +*from the same thread*, then a :exc:`RuntimeError` is raised. + +The above implicitly extends to text files, since the :func:`open()` +function will wrap a buffered object inside a :class:`TextIOWrapper`. This +includes standard streams and therefore affects the built-in function +:func:`print()` as well. + diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index acf90df..ab8479d 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2560,12 +2560,47 @@ class SignalsTest(unittest.TestCase): def test_interrupted_write_text(self): self.check_interrupted_write("xy", b"xy", mode="w", encoding="ascii") + def check_reentrant_write(self, data, **fdopen_kwargs): + def on_alarm(*args): + # Will be called reentrantly from the same thread + wio.write(data) + 1/0 + signal.signal(signal.SIGALRM, on_alarm) + r, w = os.pipe() + wio = self.io.open(w, **fdopen_kwargs) + try: + signal.alarm(1) + # Either the reentrant call to wio.write() fails with RuntimeError, + # or the signal handler raises ZeroDivisionError. + with self.assertRaises((ZeroDivisionError, RuntimeError)) as cm: + while 1: + for i in range(100): + wio.write(data) + wio.flush() + # Make sure the buffer doesn't fill up and block further writes + os.read(r, len(data) * 100) + finally: + wio.close() + os.close(r) + + def test_reentrant_write_buffered(self): + self.check_reentrant_write(b"xy", mode="wb") + + def test_reentrant_write_text(self): + self.check_reentrant_write("xy", mode="w", encoding="ascii") + + class CSignalsTest(SignalsTest): io = io class PySignalsTest(SignalsTest): io = pyio + # Handling reentrancy issues would slow down _pyio even more, so the + # tests are disabled. + test_reentrant_write_buffered = None + test_reentrant_write_text = None + def test_main(): tests = (CIOTest, PyIOTest, @@ -13,6 +13,10 @@ Core and Builtins Library ------- +- Issue #10478: Reentrant calls inside buffered IO objects (for example by + way of a signal handler) now raise a RuntimeError instead of freezing the + current process. + - Issue #10464: netrc now correctly handles lines with embedded '#' characters. diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 611dc8c..11bc0b6 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -224,6 +224,7 @@ typedef struct { #ifdef WITH_THREAD PyThread_type_lock lock; + volatile long owner; #endif Py_ssize_t buffer_size; @@ -259,15 +260,33 @@ typedef struct { /* These macros protect the buffered object against concurrent operations. */ #ifdef WITH_THREAD -#define ENTER_BUFFERED(self) \ - Py_BEGIN_ALLOW_THREADS \ - PyThread_acquire_lock(self->lock, 1); \ +static int +_enter_buffered_busy(buffered *self) +{ + if (self->owner == PyThread_get_thread_ident()) { + PyErr_Format(PyExc_RuntimeError, + "reentrant call inside %R", self); + return 0; + } + Py_BEGIN_ALLOW_THREADS + PyThread_acquire_lock(self->lock, 1); Py_END_ALLOW_THREADS + return 1; +} + +#define ENTER_BUFFERED(self) \ + ( (PyThread_acquire_lock(self->lock, 0) ? \ + 1 : _enter_buffered_busy(self)) \ + && (self->owner = PyThread_get_thread_ident(), 1) ) #define LEAVE_BUFFERED(self) \ - PyThread_release_lock(self->lock); + do { \ + self->owner = 0; \ + PyThread_release_lock(self->lock); \ + } while(0); + #else -#define ENTER_BUFFERED(self) +#define ENTER_BUFFERED(self) 1 #define LEAVE_BUFFERED(self) #endif @@ -423,7 +442,8 @@ buffered_close(buffered *self, PyObject *args) int r; CHECK_INITIALIZED(self) - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; r = buffered_closed(self); if (r < 0) @@ -436,7 +456,8 @@ buffered_close(buffered *self, PyObject *args) /* flush() will most probably re-take the lock, so drop it first */ LEAVE_BUFFERED(self) res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (res == NULL) { goto end; } @@ -639,6 +660,7 @@ _buffered_init(buffered *self) PyErr_SetString(PyExc_RuntimeError, "can't allocate read lock"); return -1; } + self->owner = 0; #endif /* Find out whether buffer_size is a power of 2 */ /* XXX is this optimization useful? */ @@ -665,7 +687,8 @@ buffered_flush(buffered *self, PyObject *args) CHECK_INITIALIZED(self) CHECK_CLOSED(self, "flush of closed file") - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedwriter_flush_unlocked(self, 0); if (res != NULL && self->readable) { /* Rewind the raw stream so that its position corresponds to @@ -692,7 +715,8 @@ buffered_peek(buffered *self, PyObject *args) return NULL; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (self->writable) { res = _bufferedwriter_flush_unlocked(self, 1); @@ -727,7 +751,8 @@ buffered_read(buffered *self, PyObject *args) if (n == -1) { /* The number of bytes is unspecified, read until the end of stream */ - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedreader_read_all(self); LEAVE_BUFFERED(self) } @@ -735,7 +760,8 @@ buffered_read(buffered *self, PyObject *args) res = _bufferedreader_read_fast(self, n); if (res == Py_None) { Py_DECREF(res); - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedreader_read_generic(self, n); LEAVE_BUFFERED(self) } @@ -763,7 +789,8 @@ buffered_read1(buffered *self, PyObject *args) if (n == 0) return PyBytes_FromStringAndSize(NULL, 0); - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (self->writable) { res = _bufferedwriter_flush_unlocked(self, 1); @@ -819,7 +846,8 @@ buffered_readinto(buffered *self, PyObject *args) /* TODO: use raw.readinto() instead! */ if (self->writable) { - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; res = _bufferedwriter_flush_unlocked(self, 0); LEAVE_BUFFERED(self) if (res == NULL) @@ -863,7 +891,8 @@ _buffered_readline(buffered *self, Py_ssize_t limit) goto end_unlocked; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + goto end_unlocked; /* Now we try to get some more from the raw stream */ if (self->writable) { @@ -1013,7 +1042,8 @@ buffered_seek(buffered *self, PyObject *args) } } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; /* Fallback: invoke raw seek() method and clear buffer */ if (self->writable) { @@ -1051,7 +1081,8 @@ buffered_truncate(buffered *self, PyObject *args) return NULL; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) + return NULL; if (self->writable) { res = _bufferedwriter_flush_unlocked(self, 0); @@ -1705,7 +1736,10 @@ bufferedwriter_write(buffered *self, PyObject *args) return NULL; } - ENTER_BUFFERED(self) + if (!ENTER_BUFFERED(self)) { + PyBuffer_Release(&buf); + return NULL; + } /* Fast path: the data to write can be fully buffered. */ if (!VALID_READ_BUFFER(self) && !VALID_WRITE_BUFFER(self)) { |