diff options
author | Walter Dörwald <walter@livinglogic.de> | 2004-09-07 20:24:22 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2004-09-07 20:24:22 (GMT) |
commit | 69652035bc2cf22b0326bb00824f4b7e2674cc8b (patch) | |
tree | 088104a47f9c9cfc466a3e1c5f4d2560b2d41450 /Lib/test/test_codecs.py | |
parent | a708d6e3b0aa2d225d4e5ab338862f67994e1c45 (diff) | |
download | cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.zip cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.gz cpython-69652035bc2cf22b0326bb00824f4b7e2674cc8b.tar.bz2 |
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted).
codecs.StreamReader now implements buffering, which enables proper
readline support for the UTF-16 decoders. codecs.StreamReader.read()
has a new argument chars which specifies the number of characters to
return. codecs.StreamReader.readline() and codecs.StreamReader.readlines()
have a new argument keepends. Trailing "\n"s will be stripped from the lines
if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and
PyUnicode_DecodeUTF16Stateful.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 118 |
1 files changed, 117 insertions, 1 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index c428c61..524c247 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3,7 +3,45 @@ import unittest import codecs import StringIO -class UTF16Test(unittest.TestCase): +class Queue(object): + """ + queue: write bytes at one end, read bytes from the other end + """ + def __init__(self): + self._buffer = "" + + def write(self, chars): + self._buffer += chars + + def read(self, size=-1): + if size<0: + s = self._buffer + self._buffer = "" + return s + else: + s = self._buffer[:size] + self._buffer = self._buffer[size:] + return s + +class PartialReadTest(unittest.TestCase): + def check_partial(self, encoding, input, partialresults): + # get a StreamReader for the encoding and feed the bytestring version + # of input to the reader byte by byte. Read every available from + # the StreamReader and check that the results equal the appropriate + # entries from partialresults. + q = Queue() + r = codecs.getreader(encoding)(q) + result = u"" + for (c, partialresult) in zip(input.encode(encoding), partialresults): + q.write(c) + result += r.read() + self.assertEqual(result, partialresult) + # check that there's nothing left in the buffers + self.assertEqual(r.read(), u"") + self.assertEqual(r.bytebuffer, "") + self.assertEqual(r.charbuffer, u"") + +class UTF16Test(PartialReadTest): spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00' spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m' @@ -23,6 +61,81 @@ class UTF16Test(unittest.TestCase): f = reader(s) self.assertEquals(f.read(), u"spamspam") + def test_partial(self): + self.check_partial( + "utf-16", + u"\x00\xff\u0100\uffff", + [ + u"", # first byte of BOM read + u"", # second byte of BOM read => byteorder known + u"", + u"\x00", + u"\x00", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100\uffff", + ] + ) + +class UTF16LETest(PartialReadTest): + + def test_partial(self): + self.check_partial( + "utf-16-le", + u"\x00\xff\u0100\uffff", + [ + u"", + u"\x00", + u"\x00", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100\uffff", + ] + ) + +class UTF16BETest(PartialReadTest): + + def test_partial(self): + self.check_partial( + "utf-16-be", + u"\x00\xff\u0100\uffff", + [ + u"", + u"\x00", + u"\x00", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff\u0100", + u"\x00\xff\u0100", + u"\x00\xff\u0100\uffff", + ] + ) + +class UTF8Test(PartialReadTest): + + def test_partial(self): + self.check_partial( + "utf-8", + u"\x00\xff\u07ff\u0800\uffff", + [ + u"\x00", + u"\x00", + u"\x00\xff", + u"\x00\xff", + u"\x00\xff\u07ff", + u"\x00\xff\u07ff", + u"\x00\xff\u07ff", + u"\x00\xff\u07ff\u0800", + u"\x00\xff\u07ff\u0800", + u"\x00\xff\u07ff\u0800", + u"\x00\xff\u07ff\u0800\uffff", + ] + ) + class EscapeDecodeTest(unittest.TestCase): def test_empty_escape_decode(self): self.assertEquals(codecs.escape_decode(""), ("", 0)) @@ -348,6 +461,9 @@ class CodecsModuleTest(unittest.TestCase): def test_main(): test_support.run_unittest( UTF16Test, + UTF16LETest, + UTF16BETest, + UTF8Test, EscapeDecodeTest, RecodingTest, PunycodeTest, |