diff options
author | Walter Dörwald <walter@livinglogic.de> | 2005-07-20 22:15:39 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2005-07-20 22:15:39 (GMT) |
commit | c9878e1b220b748788c3faa656257d5da4cd46c7 (patch) | |
tree | 93c82f0d65fecef02e121768f00181b4800fd2f6 | |
parent | 21c825417fc993d708c3ff57e2b8b97b09a20159 (diff) | |
download | cpython-c9878e1b220b748788c3faa656257d5da4cd46c7.zip cpython-c9878e1b220b748788c3faa656257d5da4cd46c7.tar.gz cpython-c9878e1b220b748788c3faa656257d5da4cd46c7.tar.bz2 |
Make attributes and local variables in the StreamReader str objects instead
of unicode objects, so that codecs that do a str->str decoding won't promote
the result to unicode. This fixes SF bug #1241507.
-rw-r--r-- | Lib/codecs.py | 12 | ||||
-rw-r--r-- | Lib/test/test_codecs.py | 17 |
2 files changed, 24 insertions, 5 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py index 08cf50b..0ffa382 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -229,7 +229,9 @@ class StreamReader(Codec): self.stream = stream self.errors = errors self.bytebuffer = "" - self.charbuffer = u"" + # For str->str decoding this will stay a str + # For str->unicode decoding the first read will promote it to unicode + self.charbuffer = "" def decode(self, input, errors='strict'): raise NotImplementedError @@ -284,7 +286,7 @@ class StreamReader(Codec): if chars < 0: # Return everything we've got result = self.charbuffer - self.charbuffer = u"" + self.charbuffer = "" else: # Return the first chars characters result = self.charbuffer[:chars] @@ -301,7 +303,7 @@ class StreamReader(Codec): """ readsize = size or 72 - line = u"" + line = "" # If size is given, we call read() only once while True: data = self.read(readsize) @@ -309,7 +311,7 @@ class StreamReader(Codec): # If we're at a "\r" read one extra character (which might # be a "\n") to get a proper line ending. If the stream is # temporarily exhausted we return the wrong line ending. - if data.endswith(u"\r"): + if data.endswith("\r"): data += self.read(size=1, chars=1) line += data @@ -319,7 +321,7 @@ class StreamReader(Codec): line0withoutend = lines[0].splitlines(False)[0] if line0withend != line0withoutend: # We really have a line end # Put the rest back together and keep it until the next call - self.charbuffer = u"".join(lines[1:]) + self.charbuffer + self.charbuffer = "".join(lines[1:]) + self.charbuffer if keepends: line = line0withend else: diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 4154ad2..5f799e0 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -663,6 +663,22 @@ class StreamReaderTest(unittest.TestCase): f = self.reader(self.stream) self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00']) +class Str2StrTest(unittest.TestCase): + + def test_read(self): + sin = "\x80".encode("base64_codec") + reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin)) + sout = reader.read() + self.assertEqual(sout, "\x80") + self.assert_(isinstance(sout, str)) + + def test_readline(self): + sin = "\x80".encode("base64_codec") + reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin)) + sout = reader.readline() + self.assertEqual(sout, "\x80") + self.assert_(isinstance(sout, str)) + all_unicode_encodings = [ "ascii", "base64_codec", @@ -867,6 +883,7 @@ def test_main(): CodecTest, CodecsModuleTest, StreamReaderTest, + Str2StrTest, BasicUnicodeTest, BasicStrTest ) |