diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2015-04-13 18:01:21 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2015-04-13 18:01:21 (GMT) |
commit | 85e3ee749c351ebe0ad1ec28856d64da50b13f20 (patch) | |
tree | 55dc2ddb8c1085bf4271a35e72d0697035426c4a | |
parent | 20d31b5182bebdf433c1b1c124377895333adbec (diff) | |
download | cpython-85e3ee749c351ebe0ad1ec28856d64da50b13f20.zip cpython-85e3ee749c351ebe0ad1ec28856d64da50b13f20.tar.gz cpython-85e3ee749c351ebe0ad1ec28856d64da50b13f20.tar.bz2 |
Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file.
-rw-r--r-- | Lib/_pyio.py | 26 | ||||
-rw-r--r-- | Lib/test/test_io.py | 13 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/_io/textio.c | 25 |
4 files changed, 52 insertions, 15 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 3ed02e4..c0b5fd1 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1865,6 +1865,19 @@ class TextIOWrapper(TextIOBase): return buffer def seek(self, cookie, whence=0): + def _reset_encoder(position): + """Reset the encoder (merely useful for proper BOM handling)""" + try: + encoder = self._encoder or self._get_encoder() + except LookupError: + # Sometimes the encoder doesn't exist + pass + else: + if position != 0: + encoder.setstate(0) + else: + encoder.reset() + if self.closed: raise ValueError("tell on closed file") if not self._seekable: @@ -1885,6 +1898,7 @@ class TextIOWrapper(TextIOBase): self._snapshot = None if self._decoder: self._decoder.reset() + _reset_encoder(position) return position if whence != 0: raise ValueError("unsupported whence (%r)" % (whence,)) @@ -1922,17 +1936,7 @@ class TextIOWrapper(TextIOBase): raise OSError("can't restore logical file position") self._decoded_chars_used = chars_to_skip - # Finally, reset the encoder (merely useful for proper BOM handling) - try: - encoder = self._encoder or self._get_encoder() - except LookupError: - # Sometimes the encoder doesn't exist - pass - else: - if cookie != 0: - encoder.setstate(0) - else: - encoder.reset() + _reset_encoder(cookie) return cookie def read(self, size=None): diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index dfa3d77..ea109ac 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2669,6 +2669,19 @@ class TextIOWrapperTest(unittest.TestCase): with self.open(filename, 'rb') as f: self.assertEqual(f.read(), 'bbbzzz'.encode(charset)) + def test_seek_append_bom(self): + # Same test, but first seek to the start and then to the end + filename = support.TESTFN + for charset in ('utf-8-sig', 'utf-16', 'utf-32'): + with self.open(filename, 'w', encoding=charset) as f: + f.write('aaa') + with self.open(filename, 'a', encoding=charset) as f: + f.seek(0) + f.seek(0, self.SEEK_END) + f.write('xxx') + with self.open(filename, 'rb') as f: + self.assertEqual(f.read(), 'aaaxxx'.encode(charset)) + def test_errors_property(self): with self.open(support.TESTFN, "w") as f: self.assertEqual(f.errors, "strict") @@ -29,6 +29,9 @@ Core and Builtins Library ------- +- Issue #22982: Improve BOM handling when seeking to multiple positions of + a writable text file. + - Issue #23865: close() methods in multiple modules now are idempotent and more robust at shutdown. If needs to release multiple resources, they are released even if errors are occured. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d1c0d01..b419275 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2042,11 +2042,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) } static int -_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) +_textiowrapper_encoder_reset(textio *self, int start_of_stream) { PyObject *res; - /* Same as _textiowrapper_decoder_setstate() above. */ - if (cookie->start_pos == 0 && cookie->dec_flags == 0) { + if (start_of_stream) { res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); self->encoding_start_of_stream = 1; } @@ -2061,6 +2060,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) return 0; } +static int +_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) +{ + /* Same as _textiowrapper_decoder_setstate() above. */ + return _textiowrapper_encoder_reset( + self, cookie->start_pos == 0 && cookie->dec_flags == 0); +} + static PyObject * textiowrapper_seek(textio *self, PyObject *args) { @@ -2128,7 +2135,17 @@ textiowrapper_seek(textio *self, PyObject *args) } res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2); - Py_XDECREF(cookieObj); + Py_CLEAR(cookieObj); + if (res == NULL) + goto fail; + if (self->encoder) { + /* If seek() == 0, we are at the start of stream, otherwise not */ + cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ); + if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) { + Py_DECREF(res); + goto fail; + } + } return res; } else if (whence != 0) { |