summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2015-04-13 18:02:33 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2015-04-13 18:02:33 (GMT)
commit56452eea39baa9d1864b2275b9e93cf37378af09 (patch)
treee2e7758ebbb55ef1a8539178a1941565e2ec63c2 /Lib
parent682d05528ecd5e4ccdbce5bc90edec1a4fd1d71d (diff)
parent85e3ee749c351ebe0ad1ec28856d64da50b13f20 (diff)
downloadcpython-56452eea39baa9d1864b2275b9e93cf37378af09.zip
cpython-56452eea39baa9d1864b2275b9e93cf37378af09.tar.gz
cpython-56452eea39baa9d1864b2275b9e93cf37378af09.tar.bz2
Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/_pyio.py26
-rw-r--r--Lib/test/test_io.py13
2 files changed, 28 insertions, 11 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 400a56a..50ad9ff 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -2275,6 +2275,19 @@ class TextIOWrapper(TextIOBase):
return buffer
def seek(self, cookie, whence=0):
+ def _reset_encoder(position):
+ """Reset the encoder (merely useful for proper BOM handling)"""
+ try:
+ encoder = self._encoder or self._get_encoder()
+ except LookupError:
+ # Sometimes the encoder doesn't exist
+ pass
+ else:
+ if position != 0:
+ encoder.setstate(0)
+ else:
+ encoder.reset()
+
if self.closed:
raise ValueError("tell on closed file")
if not self._seekable:
@@ -2295,6 +2308,7 @@ class TextIOWrapper(TextIOBase):
self._snapshot = None
if self._decoder:
self._decoder.reset()
+ _reset_encoder(position)
return position
if whence != 0:
raise ValueError("unsupported whence (%r)" % (whence,))
@@ -2332,17 +2346,7 @@ class TextIOWrapper(TextIOBase):
raise OSError("can't restore logical file position")
self._decoded_chars_used = chars_to_skip
- # Finally, reset the encoder (merely useful for proper BOM handling)
- try:
- encoder = self._encoder or self._get_encoder()
- except LookupError:
- # Sometimes the encoder doesn't exist
- pass
- else:
- if cookie != 0:
- encoder.setstate(0)
- else:
- encoder.reset()
+ _reset_encoder(cookie)
return cookie
def read(self, size=None):
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 2d02a31..4d17821 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2730,6 +2730,19 @@ class TextIOWrapperTest(unittest.TestCase):
with self.open(filename, 'rb') as f:
self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
+ def test_seek_append_bom(self):
+ # Same test, but first seek to the start and then to the end
+ filename = support.TESTFN
+ for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
+ with self.open(filename, 'w', encoding=charset) as f:
+ f.write('aaa')
+ with self.open(filename, 'a', encoding=charset) as f:
+ f.seek(0)
+ f.seek(0, self.SEEK_END)
+ f.write('xxx')
+ with self.open(filename, 'rb') as f:
+ self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
+
def test_errors_property(self):
with self.open(support.TESTFN, "w") as f:
self.assertEqual(f.errors, "strict")