diff options
-rw-r--r-- | Lib/test/test_file_eintr.py | 236 | ||||
-rw-r--r-- | Lib/test/test_io.py | 10 | ||||
-rw-r--r-- | Misc/NEWS | 5 | ||||
-rw-r--r-- | Modules/_io/_iomodule.h | 5 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 8 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 7 | ||||
-rw-r--r-- | Modules/_io/iobase.c | 23 | ||||
-rw-r--r-- | Modules/_io/textio.c | 16 |
8 files changed, 295 insertions, 15 deletions
diff --git a/Lib/test/test_file_eintr.py b/Lib/test/test_file_eintr.py new file mode 100644 index 0000000..b4e18ce --- /dev/null +++ b/Lib/test/test_file_eintr.py @@ -0,0 +1,236 @@ +# Written to test interrupted system calls interfering with our many buffered +# IO implementations. http://bugs.python.org/issue12268 +# +# It was suggested that this code could be merged into test_io and the tests +# made to work using the same method as the existing signal tests in test_io. +# I was unable to get single process tests using alarm or setitimer that way +# to reproduce the EINTR problems. This process based test suite reproduces +# the problems prior to the issue12268 patch reliably on Linux and OSX. +# - gregory.p.smith + +import os +import select +import signal +import subprocess +import sys +from test.support import run_unittest +import time +import unittest + +# Test import all of the things we're about to try testing up front. +from _io import FileIO + + +@unittest.skipUnless(os.name == 'posix', 'tests requires a posix system.') +class TestFileIOSignalInterrupt(unittest.TestCase): + def setUp(self): + self._process = None + + def tearDown(self): + if self._process and self._process.poll() is None: + try: + self._process.kill() + except OSError: + pass + + def _generate_infile_setup_code(self): + """Returns the infile = ... line of code for the reader process. + + subclasseses should override this to test different IO objects. + """ + return ('import _io ;' + 'infile = _io.FileIO(sys.stdin.fileno(), "rb")') + + def fail_with_process_info(self, why, stdout=b'', stderr=b'', + communicate=True): + """A common way to cleanup and fail with useful debug output. + + Kills the process if it is still running, collects remaining output + and fails the test with an error message including the output. + + Args: + why: Text to go after "Error from IO process" in the message. + stdout, stderr: standard output and error from the process so + far to include in the error message. + communicate: bool, when True we call communicate() on the process + after killing it to gather additional output. + """ + if self._process.poll() is None: + time.sleep(0.1) # give it time to finish printing the error. + try: + self._process.terminate() # Ensure it dies. + except OSError: + pass + if communicate: + stdout_end, stderr_end = self._process.communicate() + stdout += stdout_end + stderr += stderr_end + self.fail('Error from IO process %s:\nSTDOUT:\n%sSTDERR:\n%s\n' % + (why, stdout.decode(), stderr.decode())) + + def _test_reading(self, data_to_write, read_and_verify_code): + """Generic buffered read method test harness to validate EINTR behavior. + + Also validates that Python signal handlers are run during the read. + + Args: + data_to_write: String to write to the child process for reading + before sending it a signal, confirming the signal was handled, + writing a final newline and closing the infile pipe. + read_and_verify_code: Single "line" of code to read from a file + object named 'infile' and validate the result. This will be + executed as part of a python subprocess fed data_to_write. + """ + infile_setup_code = self._generate_infile_setup_code() + # Total pipe IO in this function is smaller than the minimum posix OS + # pipe buffer size of 512 bytes. No writer should block. + assert len(data_to_write) < 512, 'data_to_write must fit in pipe buf.' + + # Start a subprocess to call our read method while handling a signal. + self._process = subprocess.Popen( + [sys.executable, '-u', '-c', + 'import signal, sys ;' + 'signal.signal(signal.SIGINT, ' + 'lambda s, f: sys.stderr.write("$\\n")) ;' + + infile_setup_code + ' ;' + + 'sys.stderr.write("Worm Sign!\\n") ;' + + read_and_verify_code + ' ;' + + 'infile.close()' + ], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + # Wait for the signal handler to be installed. + worm_sign = self._process.stderr.read(len(b'Worm Sign!\n')) + if worm_sign != b'Worm Sign!\n': # See also, Dune by Frank Herbert. + self.fail_with_process_info('while awaiting a sign', + stderr=worm_sign) + self._process.stdin.write(data_to_write) + + signals_sent = 0 + rlist = [] + # We don't know when the read_and_verify_code in our child is actually + # executing within the read system call we want to interrupt. This + # loop waits for a bit before sending the first signal to increase + # the likelihood of that. Implementations without correct EINTR + # and signal handling usually fail this test. + while not rlist: + rlist, _, _ = select.select([self._process.stderr], (), (), 0.05) + self._process.send_signal(signal.SIGINT) + signals_sent += 1 + if signals_sent > 200: + self._process.kill() + self.fail('reader process failed to handle our signals.') + # This assumes anything unexpected that writes to stderr will also + # write a newline. That is true of the traceback printing code. + signal_line = self._process.stderr.readline() + if signal_line != b'$\n': + self.fail_with_process_info('while awaiting signal', + stderr=signal_line) + + # We append a newline to our input so that a readline call can + # end on its own before the EOF is seen and so that we're testing + # the read call that was interrupted by a signal before the end of + # the data stream has been reached. + stdout, stderr = self._process.communicate(input=b'\n') + if self._process.returncode: + self.fail_with_process_info( + 'exited rc=%d' % self._process.returncode, + stdout, stderr, communicate=False) + # PASS! + + # String format for the read_and_verify_code used by read methods. + _READING_CODE_TEMPLATE = ( + 'got = infile.{read_method_name}() ;' + 'expected = {expected!r} ;' + 'assert got == expected, (' + '"{read_method_name} returned wrong data.\\n"' + '"got data %r\\nexpected %r" % (got, expected))' + ) + + def test_readline(self): + """readline() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello, world!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readline', + expected=b'hello, world!\n')) + + def test_readlines(self): + """readlines() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readlines', + expected=[b'hello\n', b'world!\n'])) + + def test_readall(self): + """readall() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readall', + expected=b'hello\nworld!\n')) + # read() is the same thing as readall(). + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='read', + expected=b'hello\nworld!\n')) + + +class TestBufferedIOSignalInterrupt(TestFileIOSignalInterrupt): + def _generate_infile_setup_code(self): + """Returns the infile = ... line of code to make a BufferedReader.""" + return ('infile = open(sys.stdin.fileno(), "rb") ;' + 'import _io ;assert isinstance(infile, _io.BufferedReader)') + + def test_readall(self): + """BufferedReader.read() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='read', + expected=b'hello\nworld!\n')) + + +class TestTextIOSignalInterrupt(TestFileIOSignalInterrupt): + def _generate_infile_setup_code(self): + """Returns the infile = ... line of code to make a TextIOWrapper.""" + return ('infile = open(sys.stdin.fileno(), "rt", newline=None) ;' + 'import _io ;assert isinstance(infile, _io.TextIOWrapper)') + + def test_readline(self): + """readline() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello, world!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readline', + expected='hello, world!\n')) + + def test_readlines(self): + """readlines() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\r\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='readlines', + expected=['hello\n', 'world!\n'])) + + def test_readall(self): + """read() must handle signals and not lose data.""" + self._test_reading( + data_to_write=b'hello\nworld!', + read_and_verify_code=self._READING_CODE_TEMPLATE.format( + read_method_name='read', + expected="hello\nworld!\n")) + + +def test_main(): + test_cases = [ + tc for tc in globals().values() + if isinstance(tc, type) and issubclass(tc, unittest.TestCase)] + run_unittest(*test_cases) + + +if __name__ == '__main__': + test_main() diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 54ba179..dfc7c69 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2912,7 +2912,7 @@ class SignalsTest(unittest.TestCase): try: wio = self.io.open(w, **fdopen_kwargs) t.start() - signal.alarm(1) + signal.setitimer(signal.ITIMER_REAL, 0.1) # Fill the pipe enough that the write will be blocking. # It will be interrupted by the timer armed above. Since the # other thread has read one byte, the low-level write will @@ -2957,7 +2957,7 @@ class SignalsTest(unittest.TestCase): r, w = os.pipe() wio = self.io.open(w, **fdopen_kwargs) try: - signal.alarm(1) + signal.setitimer(signal.ITIMER_REAL, 0.1) # Either the reentrant call to wio.write() fails with RuntimeError, # or the signal handler raises ZeroDivisionError. with self.assertRaises((ZeroDivisionError, RuntimeError)) as cm: @@ -2992,7 +2992,7 @@ class SignalsTest(unittest.TestCase): try: rio = self.io.open(r, **fdopen_kwargs) os.write(w, b"foo") - signal.alarm(1) + signal.setitimer(signal.ITIMER_REAL, 0.1) # Expected behaviour: # - first raw read() returns partial b"foo" # - second raw read() returns EINTR @@ -3036,13 +3036,13 @@ class SignalsTest(unittest.TestCase): t.daemon = True def alarm1(sig, frame): signal.signal(signal.SIGALRM, alarm2) - signal.alarm(1) + signal.setitimer(signal.ITIMER_REAL, 0.1) def alarm2(sig, frame): t.start() signal.signal(signal.SIGALRM, alarm1) try: wio = self.io.open(w, **fdopen_kwargs) - signal.alarm(1) + signal.setitimer(signal.ITIMER_REAL, 0.1) # Expected behaviour: # - first raw write() is partial (because of the limited pipe buffer # and the first alarm) @@ -10,6 +10,11 @@ What's New in Python 3.3.0 Beta 1? Core and Builtins ----------------- +- Issue #12268: File readline, readlines and read() or readall() methods + no longer lose data when an underlying read system call is interrupted. + IOError is no longer raised due to a read system call returning EINTR + from within these methods. + - Issue #11626: Add _SizeT functions to stable ABI. - Issue #15146: Add PyType_FromSpecWithBases. Patch by Robin Schreiber. diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 987aac8..0fe90a3 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -57,6 +57,11 @@ extern Py_ssize_t _PyIO_find_line_ending( int translated, int universal, PyObject *readnl, int kind, char *start, char *end, Py_ssize_t *consumed); +/* Return 1 if an EnvironmentError with errno == EINTR is set (and then + clears the error indicator), 0 otherwise. + Should only be called when PyErr_Occurred() is true. +*/ +extern int _PyIO_trap_eintr(void); #define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */ diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index dc723b1..21393a3 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -746,8 +746,8 @@ _buffered_init(buffered *self) clears the error indicator), 0 otherwise. Should only be called when PyErr_Occurred() is true. */ -static int -_trap_eintr(void) +int +_PyIO_trap_eintr(void) { static PyObject *eintr_int = NULL; PyObject *typ, *val, *tb; @@ -1396,7 +1396,7 @@ _bufferedreader_raw_read(buffered *self, char *start, Py_ssize_t len) */ do { res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_readinto, memobj, NULL); - } while (res == NULL && _trap_eintr()); + } while (res == NULL && _PyIO_trap_eintr()); Py_DECREF(memobj); if (res == NULL) return -1; @@ -1850,7 +1850,7 @@ _bufferedwriter_raw_write(buffered *self, char *start, Py_ssize_t len) errno = 0; res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_write, memobj, NULL); errnum = errno; - } while (res == NULL && _trap_eintr()); + } while (res == NULL && _PyIO_trap_eintr()); Py_DECREF(memobj); if (res == NULL) return -1; diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 226a5d2..198392f 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -670,6 +670,13 @@ fileio_readall(fileio *self) if (n == 0) break; if (n < 0) { + if (errno == EINTR) { + if (PyErr_CheckSignals()) { + Py_DECREF(result); + return NULL; + } + continue; + } if (total > 0) break; if (errno == EAGAIN) { diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index b30bbb6..dd052ae 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -474,11 +474,15 @@ iobase_readline(PyObject *self, PyObject *args) PyObject *b; if (has_peek) { - _Py_IDENTIFIER(peek); PyObject *readahead = _PyObject_CallMethodId(self, &PyId_peek, "i", 1); - - if (readahead == NULL) + if (readahead == NULL) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto fail; + } if (!PyBytes_Check(readahead)) { PyErr_Format(PyExc_IOError, "peek() should have returned a bytes object, " @@ -511,8 +515,14 @@ iobase_readline(PyObject *self, PyObject *args) } b = _PyObject_CallMethodId(self, &PyId_read, "n", nreadahead); - if (b == NULL) + if (b == NULL) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto fail; + } if (!PyBytes_Check(b)) { PyErr_Format(PyExc_IOError, "read() should have returned a bytes object, " @@ -827,6 +837,11 @@ rawiobase_readall(PyObject *self, PyObject *args) PyObject *data = _PyObject_CallMethodId(self, &PyId_read, "i", DEFAULT_BUFFER_SIZE); if (!data) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } Py_DECREF(chunks); return NULL; } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 287f165..d390d5a 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1560,8 +1560,14 @@ textiowrapper_read(textio *self, PyObject *args) /* Keep reading chunks until we have n characters to return */ while (remaining > 0) { res = textiowrapper_read_chunk(self, remaining); - if (res < 0) + if (res < 0) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto fail; + } if (res == 0) /* EOF */ break; if (chunks == NULL) { @@ -1728,8 +1734,14 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit) while (!self->decoded_chars || !PyUnicode_GET_LENGTH(self->decoded_chars)) { res = textiowrapper_read_chunk(self, 0); - if (res < 0) + if (res < 0) { + /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() + when EINTR occurs so we needn't do it ourselves. */ + if (_PyIO_trap_eintr()) { + continue; + } goto error; + } if (res == 0) break; } |