summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_codecs.py
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2004-09-07 20:24:22 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2004-09-07 20:24:22 (GMT)
commit69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch)
tree088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Lib/test/test_codecs.py
parenta708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff)
downloadcpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz
cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted). codecs.StreamReader now implements buffering, which enables proper readline support for the UTF-16 decoders. codecs.StreamReader.read() has a new argument chars which specifies the number of characters to return. codecs.StreamReader.readline() and codecs.StreamReader.readlines() have a new argument keepends. Trailing "\n"s will be stripped from the lines if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and PyUnicode_DecodeUTF16Stateful.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r--Lib/test/test_codecs.py118
1 files changed, 117 insertions, 1 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index c428c61..524c247 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3,7 +3,45 @@ import unittest
import codecs
import StringIO
-class UTF16Test(unittest.TestCase):
+class Queue(object):
+ """
+ queue: write bytes at one end, read bytes from the other end
+ """
+ def __init__(self):
+ self._buffer = ""
+
+ def write(self, chars):
+ self._buffer += chars
+
+ def read(self, size=-1):
+ if size<0:
+ s = self._buffer
+ self._buffer = ""
+ return s
+ else:
+ s = self._buffer[:size]
+ self._buffer = self._buffer[size:]
+ return s
+
+class PartialReadTest(unittest.TestCase):
+ def check_partial(self, encoding, input, partialresults):
+ # get a StreamReader for the encoding and feed the bytestring version
+ # of input to the reader byte by byte. Read every available from
+ # the StreamReader and check that the results equal the appropriate
+ # entries from partialresults.
+ q = Queue()
+ r = codecs.getreader(encoding)(q)
+ result = u""
+ for (c, partialresult) in zip(input.encode(encoding), partialresults):
+ q.write(c)
+ result += r.read()
+ self.assertEqual(result, partialresult)
+ # check that there's nothing left in the buffers
+ self.assertEqual(r.read(), u"")
+ self.assertEqual(r.bytebuffer, "")
+ self.assertEqual(r.charbuffer, u"")
+
+class UTF16Test(PartialReadTest):
spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
@@ -23,6 +61,81 @@ class UTF16Test(unittest.TestCase):
f = reader(s)
self.assertEquals(f.read(), u"spamspam")
+ def test_partial(self):
+ self.check_partial(
+ "utf-16",
+ u"\x00\xff\u0100\uffff",
+ [
+ u"", # first byte of BOM read
+ u"", # second byte of BOM read => byteorder known
+ u"",
+ u"\x00",
+ u"\x00",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100\uffff",
+ ]
+ )
+
+class UTF16LETest(PartialReadTest):
+
+ def test_partial(self):
+ self.check_partial(
+ "utf-16-le",
+ u"\x00\xff\u0100\uffff",
+ [
+ u"",
+ u"\x00",
+ u"\x00",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100\uffff",
+ ]
+ )
+
+class UTF16BETest(PartialReadTest):
+
+ def test_partial(self):
+ self.check_partial(
+ "utf-16-be",
+ u"\x00\xff\u0100\uffff",
+ [
+ u"",
+ u"\x00",
+ u"\x00",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100",
+ u"\x00\xff\u0100\uffff",
+ ]
+ )
+
+class UTF8Test(PartialReadTest):
+
+ def test_partial(self):
+ self.check_partial(
+ "utf-8",
+ u"\x00\xff\u07ff\u0800\uffff",
+ [
+ u"\x00",
+ u"\x00",
+ u"\x00\xff",
+ u"\x00\xff",
+ u"\x00\xff\u07ff",
+ u"\x00\xff\u07ff",
+ u"\x00\xff\u07ff",
+ u"\x00\xff\u07ff\u0800",
+ u"\x00\xff\u07ff\u0800",
+ u"\x00\xff\u07ff\u0800",
+ u"\x00\xff\u07ff\u0800\uffff",
+ ]
+ )
+
class EscapeDecodeTest(unittest.TestCase):
def test_empty_escape_decode(self):
self.assertEquals(codecs.escape_decode(""), ("", 0))
@@ -348,6 +461,9 @@ class CodecsModuleTest(unittest.TestCase):
def test_main():
test_support.run_unittest(
UTF16Test,
+ UTF16LETest,
+ UTF16BETest,
+ UTF8Test,
EscapeDecodeTest,
RecodingTest,
PunycodeTest,