From 85e3ee749c351ebe0ad1ec28856d64da50b13f20 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 13 Apr 2015 20:01:21 +0200 Subject: Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file. --- Lib/_pyio.py | 26 +++++++++++++++----------- Lib/test/test_io.py | 13 +++++++++++++ Misc/NEWS | 3 +++ Modules/_io/textio.c | 25 +++++++++++++++++++++---- 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 3ed02e4..c0b5fd1 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1865,6 +1865,19 @@ class TextIOWrapper(TextIOBase): return buffer def seek(self, cookie, whence=0): + def _reset_encoder(position): + """Reset the encoder (merely useful for proper BOM handling)""" + try: + encoder = self._encoder or self._get_encoder() + except LookupError: + # Sometimes the encoder doesn't exist + pass + else: + if position != 0: + encoder.setstate(0) + else: + encoder.reset() + if self.closed: raise ValueError("tell on closed file") if not self._seekable: @@ -1885,6 +1898,7 @@ class TextIOWrapper(TextIOBase): self._snapshot = None if self._decoder: self._decoder.reset() + _reset_encoder(position) return position if whence != 0: raise ValueError("unsupported whence (%r)" % (whence,)) @@ -1922,17 +1936,7 @@ class TextIOWrapper(TextIOBase): raise OSError("can't restore logical file position") self._decoded_chars_used = chars_to_skip - # Finally, reset the encoder (merely useful for proper BOM handling) - try: - encoder = self._encoder or self._get_encoder() - except LookupError: - # Sometimes the encoder doesn't exist - pass - else: - if cookie != 0: - encoder.setstate(0) - else: - encoder.reset() + _reset_encoder(cookie) return cookie def read(self, size=None): diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index dfa3d77..ea109ac 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2669,6 +2669,19 @@ class TextIOWrapperTest(unittest.TestCase): with self.open(filename, 'rb') as f: self.assertEqual(f.read(), 'bbbzzz'.encode(charset)) + def test_seek_append_bom(self): + # Same test, but first seek to the start and then to the end + filename = support.TESTFN + for charset in ('utf-8-sig', 'utf-16', 'utf-32'): + with self.open(filename, 'w', encoding=charset) as f: + f.write('aaa') + with self.open(filename, 'a', encoding=charset) as f: + f.seek(0) + f.seek(0, self.SEEK_END) + f.write('xxx') + with self.open(filename, 'rb') as f: + self.assertEqual(f.read(), 'aaaxxx'.encode(charset)) + def test_errors_property(self): with self.open(support.TESTFN, "w") as f: self.assertEqual(f.errors, "strict") diff --git a/Misc/NEWS b/Misc/NEWS index fe91ae2..6ed85ef 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -29,6 +29,9 @@ Core and Builtins Library ------- +- Issue #22982: Improve BOM handling when seeking to multiple positions of + a writable text file. + - Issue #23865: close() methods in multiple modules now are idempotent and more robust at shutdown. If needs to release multiple resources, they are released even if errors are occured. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d1c0d01..b419275 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -2042,11 +2042,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) } static int -_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) +_textiowrapper_encoder_reset(textio *self, int start_of_stream) { PyObject *res; - /* Same as _textiowrapper_decoder_setstate() above. */ - if (cookie->start_pos == 0 && cookie->dec_flags == 0) { + if (start_of_stream) { res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); self->encoding_start_of_stream = 1; } @@ -2061,6 +2060,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) return 0; } +static int +_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) +{ + /* Same as _textiowrapper_decoder_setstate() above. */ + return _textiowrapper_encoder_reset( + self, cookie->start_pos == 0 && cookie->dec_flags == 0); +} + static PyObject * textiowrapper_seek(textio *self, PyObject *args) { @@ -2128,7 +2135,17 @@ textiowrapper_seek(textio *self, PyObject *args) } res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2); - Py_XDECREF(cookieObj); + Py_CLEAR(cookieObj); + if (res == NULL) + goto fail; + if (self->encoder) { + /* If seek() == 0, we are at the start of stream, otherwise not */ + cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ); + if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) { + Py_DECREF(res); + goto fail; + } + } return res; } else if (whence != 0) { -- cgit v0.12