summaryrefslogtreecommitdiffstats
path: root/Lib/_pyio.py
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2009-05-14 18:55:55 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2009-05-14 18:55:55 (GMT)
commite450185b4ad645d4f72cbd4b2139d6a987edc84d (patch)
treed588925c1710f0404f9ac61058a79a5b33382408 /Lib/_pyio.py
parentb565577aa722d8b39aa42da0384f776680c03c36 (diff)
downloadcpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.zip
cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.gz
cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.bz2
Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library.
This means, for example, that opening an UTF-16 text file in append mode doesn't add a BOM at the end of the file if the file isn't empty.
Diffstat (limited to 'Lib/_pyio.py')
-rw-r--r--Lib/_pyio.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index e3e7c3d..c9a7c5e 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -1436,6 +1436,15 @@ class TextIOWrapper(TextIOBase):
self._snapshot = None # info for reconstructing decoder state
self._seekable = self._telling = self.buffer.seekable()
+ if self._seekable and self.writable():
+ position = self.buffer.tell()
+ if position != 0:
+ try:
+ self._get_encoder().setstate(0)
+ except LookupError:
+ # Sometimes the encoder doesn't exist
+ pass
+
# self._snapshot is either None, or a tuple (dec_flags, next_input)
# where dec_flags is the second (integer) item of the decoder state
# and next_input is the chunk of input bytes that comes next after the
@@ -1741,6 +1750,17 @@ class TextIOWrapper(TextIOBase):
raise IOError("can't restore logical file position")
self._decoded_chars_used = chars_to_skip
+ # Finally, reset the encoder (merely useful for proper BOM handling)
+ try:
+ encoder = self._encoder or self._get_encoder()
+ except LookupError:
+ # Sometimes the encoder doesn't exist
+ pass
+ else:
+ if cookie != 0:
+ encoder.setstate(0)
+ else:
+ encoder.reset()
return cookie
def read(self, n=None):