summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorAlexandre Vassalotti <alexandre@peadrop.com>2008-01-07 18:30:48 (GMT)
committerAlexandre Vassalotti <alexandre@peadrop.com>2008-01-07 18:30:48 (GMT)
commita38f73b1bb327ceb1aad3a3001255ab81da91c22 (patch)
treee1bc52230da9265f8b809b049f2499819047942f /Lib
parent52d168a9950cc0933b6f650e5fdebfad13347e88 (diff)
downloadcpython-a38f73b1bb327ceb1aad3a3001255ab81da91c22.zip
cpython-a38f73b1bb327ceb1aad3a3001255ab81da91c22.tar.gz
cpython-a38f73b1bb327ceb1aad3a3001255ab81da91c22.tar.bz2
Fix issue1753: TextIOWrapper.write writes utf BOM for every string.
Patch by Erick Tryzelaar, with slight modifications by me.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/io.py12
-rw-r--r--Lib/test/test_io.py18
2 files changed, 26 insertions, 4 deletions
diff --git a/Lib/io.py b/Lib/io.py
index 2a5348d..e427fe6 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -1182,6 +1182,7 @@ class TextIOWrapper(TextIOBase):
self._readnl = newline
self._writetranslate = newline != ''
self._writenl = newline or os.linesep
+ self._encoder = None
self._decoder = None
self._pending = ""
self._snapshot = None
@@ -1240,8 +1241,9 @@ class TextIOWrapper(TextIOBase):
haslf = (self._writetranslate or self._line_buffering) and "\n" in s
if haslf and self._writetranslate and self._writenl != "\n":
s = s.replace("\n", self._writenl)
+ encoder = self._encoder or self._get_encoder()
# XXX What if we were just reading?
- b = s.encode(self._encoding, self._errors)
+ b = encoder.encode(s)
self.buffer.write(b)
if self._line_buffering and (haslf or "\r" in s):
self.flush()
@@ -1250,11 +1252,13 @@ class TextIOWrapper(TextIOBase):
self._decoder.reset()
return length
+ def _get_encoder(self):
+ make_encoder = codecs.getincrementalencoder(self._encoding)
+ self._encoder = make_encoder(self._errors)
+ return self._encoder
+
def _get_decoder(self):
make_decoder = codecs.getincrementaldecoder(self._encoding)
- if make_decoder is None:
- raise IOError("Can't find an incremental decoder for encoding %s" %
- self._encoding)
decoder = make_decoder(self._errors)
if self._readuniversal:
decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 33b32e0..4963416 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -765,6 +765,24 @@ class TextIOWrapperTest(unittest.TestCase):
f.readline()
f.tell()
+ def testEncodedWrites(self):
+ data = "1234567890"
+ tests = ("utf-16",
+ "utf-16-le",
+ "utf-16-be",
+ "utf-32",
+ "utf-32-le",
+ "utf-32-be")
+ for encoding in tests:
+ buf = io.BytesIO()
+ f = io.TextIOWrapper(buf, encoding=encoding)
+ # Check if the BOM is written only once (see issue1753).
+ f.write(data)
+ f.write(data)
+ f.seek(0)
+ self.assertEquals(f.read(), data * 2)
+ self.assertEquals(buf.getvalue(), (data * 2).encode(encoding))
+
def timingTest(self):
timer = time.time
enc = "utf8"