summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2005-07-20 22:15:39 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2005-07-20 22:15:39 (GMT)
commitc9878e1b220b748788c3faa656257d5da4cd46c7 (patch)
tree93c82f0d65fecef02e121768f00181b4800fd2f6 /Lib
parent21c825417fc993d708c3ff57e2b8b97b09a20159 (diff)
downloadcpython-c9878e1b220b748788c3faa656257d5da4cd46c7.zip
cpython-c9878e1b220b748788c3faa656257d5da4cd46c7.tar.gz
cpython-c9878e1b220b748788c3faa656257d5da4cd46c7.tar.bz2
Make attributes and local variables in the StreamReader str objects instead
of unicode objects, so that codecs that do a str->str decoding won't promote the result to unicode. This fixes SF bug #1241507.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/codecs.py12
-rw-r--r--Lib/test/test_codecs.py17
2 files changed, 24 insertions, 5 deletions
diff --git a/Lib/codecs.py b/Lib/codecs.py
index 08cf50b..0ffa382 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -229,7 +229,9 @@ class StreamReader(Codec):
self.stream = stream
self.errors = errors
self.bytebuffer = ""
- self.charbuffer = u""
+ # For str->str decoding this will stay a str
+ # For str->unicode decoding the first read will promote it to unicode
+ self.charbuffer = ""
def decode(self, input, errors='strict'):
raise NotImplementedError
@@ -284,7 +286,7 @@ class StreamReader(Codec):
if chars < 0:
# Return everything we've got
result = self.charbuffer
- self.charbuffer = u""
+ self.charbuffer = ""
else:
# Return the first chars characters
result = self.charbuffer[:chars]
@@ -301,7 +303,7 @@ class StreamReader(Codec):
"""
readsize = size or 72
- line = u""
+ line = ""
# If size is given, we call read() only once
while True:
data = self.read(readsize)
@@ -309,7 +311,7 @@ class StreamReader(Codec):
# If we're at a "\r" read one extra character (which might
# be a "\n") to get a proper line ending. If the stream is
# temporarily exhausted we return the wrong line ending.
- if data.endswith(u"\r"):
+ if data.endswith("\r"):
data += self.read(size=1, chars=1)
line += data
@@ -319,7 +321,7 @@ class StreamReader(Codec):
line0withoutend = lines[0].splitlines(False)[0]
if line0withend != line0withoutend: # We really have a line end
# Put the rest back together and keep it until the next call
- self.charbuffer = u"".join(lines[1:]) + self.charbuffer
+ self.charbuffer = "".join(lines[1:]) + self.charbuffer
if keepends:
line = line0withend
else:
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 4154ad2..5f799e0 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -663,6 +663,22 @@ class StreamReaderTest(unittest.TestCase):
f = self.reader(self.stream)
self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
+class Str2StrTest(unittest.TestCase):
+
+ def test_read(self):
+ sin = "\x80".encode("base64_codec")
+ reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
+ sout = reader.read()
+ self.assertEqual(sout, "\x80")
+ self.assert_(isinstance(sout, str))
+
+ def test_readline(self):
+ sin = "\x80".encode("base64_codec")
+ reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
+ sout = reader.readline()
+ self.assertEqual(sout, "\x80")
+ self.assert_(isinstance(sout, str))
+
all_unicode_encodings = [
"ascii",
"base64_codec",
@@ -867,6 +883,7 @@ def test_main():
CodecTest,
CodecsModuleTest,
StreamReaderTest,
+ Str2StrTest,
BasicUnicodeTest,
BasicStrTest
)