diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2009-05-14 18:55:55 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2009-05-14 18:55:55 (GMT) |
commit | e450185b4ad645d4f72cbd4b2139d6a987edc84d (patch) | |
tree | d588925c1710f0404f9ac61058a79a5b33382408 /Lib/_pyio.py | |
parent | b565577aa722d8b39aa42da0384f776680c03c36 (diff) | |
download | cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.zip cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.gz cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.bz2 |
Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library.
This means, for example, that opening an UTF-16 text file in
append mode doesn't add a BOM at the end of the file if the file isn't
empty.
Diffstat (limited to 'Lib/_pyio.py')
-rw-r--r-- | Lib/_pyio.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/Lib/_pyio.py b/Lib/_pyio.py index e3e7c3d..c9a7c5e 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -1436,6 +1436,15 @@ class TextIOWrapper(TextIOBase): self._snapshot = None # info for reconstructing decoder state self._seekable = self._telling = self.buffer.seekable() + if self._seekable and self.writable(): + position = self.buffer.tell() + if position != 0: + try: + self._get_encoder().setstate(0) + except LookupError: + # Sometimes the encoder doesn't exist + pass + # self._snapshot is either None, or a tuple (dec_flags, next_input) # where dec_flags is the second (integer) item of the decoder state # and next_input is the chunk of input bytes that comes next after the @@ -1741,6 +1750,17 @@ class TextIOWrapper(TextIOBase): raise IOError("can't restore logical file position") self._decoded_chars_used = chars_to_skip + # Finally, reset the encoder (merely useful for proper BOM handling) + try: + encoder = self._encoder or self._get_encoder() + except LookupError: + # Sometimes the encoder doesn't exist + pass + else: + if cookie != 0: + encoder.setstate(0) + else: + encoder.reset() return cookie def read(self, n=None): |