Issue #5006: Better handling of unicode byte-order marks (BOM) in the io library.

This means, for example, that opening an UTF-16 text file in append mode doesn't add a BOM at the end of the file if the file isn't empty.
author: Antoine Pitrou <solipsis@pitrou.net> 2009-05-14 18:55:55 (GMT)
committer: Antoine Pitrou <solipsis@pitrou.net> 2009-05-14 18:55:55 (GMT)
commit: e450185b4ad645d4f72cbd4b2139d6a987edc84d (patch)
tree: d588925c1710f0404f9ac61058a79a5b33382408 /Lib/test
parent: b565577aa722d8b39aa42da0384f776680c03c36 (diff)
download: cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.zip
cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.gz
cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.bz2
1 files changed, 31 insertions, 0 deletions
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 1a525dc..98dc711 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -1963,6 +1963,37 @@ class TextIOWrapperTest(unittest.TestCase):
 
         self.assertEqual(buffer.seekable(), txt.seekable())
 
+    def test_append_bom(self):
+        # The BOM is not written again when appending to a non-empty file
+        filename = support.TESTFN
+        for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
+            with self.open(filename, 'w', encoding=charset) as f:
+                f.write('aaa')
+                pos = f.tell()
+            with self.open(filename, 'rb') as f:
+                self.assertEquals(f.read(), 'aaa'.encode(charset))
+
+            with self.open(filename, 'a', encoding=charset) as f:
+                f.write('xxx')
+            with self.open(filename, 'rb') as f:
+                self.assertEquals(f.read(), 'aaaxxx'.encode(charset))
+
+    def test_seek_bom(self):
+        # Same test, but when seeking manually
+        filename = support.TESTFN
+        for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
+            with self.open(filename, 'w', encoding=charset) as f:
+                f.write('aaa')
+                pos = f.tell()
+            with self.open(filename, 'r+', encoding=charset) as f:
+                f.seek(pos)
+                f.write('zzz')
+                f.seek(0)
+                f.write('bbb')
+            with self.open(filename, 'rb') as f:
+                self.assertEquals(f.read(), 'bbbzzz'.encode(charset))
+
+
 class CTextIOWrapperTest(TextIOWrapperTest):
 
     def test_initialization(self):
author	Antoine Pitrou <solipsis@pitrou.net>	2009-05-14 18:55:55 (GMT)
committer	Antoine Pitrou <solipsis@pitrou.net>	2009-05-14 18:55:55 (GMT)
commit	e450185b4ad645d4f72cbd4b2139d6a987edc84d (patch)
tree	d588925c1710f0404f9ac61058a79a5b33382408 /Lib/test
parent	b565577aa722d8b39aa42da0384f776680c03c36 (diff)
download	cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.zip cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.gz cpython-e450185b4ad645d4f72cbd4b2139d6a987edc84d.tar.bz2