diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-08 21:14:24 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-01-08 21:14:24 (GMT) |
commit | 48e188e57313813bd048e25b8fa6123b8cd5c9a0 (patch) | |
tree | 3d5b4dacb7995a82e1057a7fa4dc1ed7d8dc8c5c /Lib | |
parent | dec798eb46f7edfe0995ce1b8966097fb7567eb7 (diff) | |
download | cpython-48e188e57313813bd048e25b8fa6123b8cd5c9a0.zip cpython-48e188e57313813bd048e25b8fa6123b8cd5c9a0.tar.gz cpython-48e188e57313813bd048e25b8fa6123b8cd5c9a0.tar.bz2 |
Issue #11461: Fix the incremental UTF-16 decoder. Original patch by
Amaury Forgeot d'Arc. Added tests for partial decoding of non-BMP
characters.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_codecs.py | 48 |
1 files changed, 40 insertions, 8 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 0f7c23e..4c58b2d 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -313,7 +313,7 @@ class UTF32Test(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", # first byte of BOM read "", # second byte of BOM read @@ -335,6 +335,10 @@ class UTF32Test(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -369,7 +373,7 @@ class UTF32LETest(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "", @@ -387,6 +391,10 @@ class UTF32LETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -409,7 +417,7 @@ class UTF32BETest(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "", @@ -427,6 +435,10 @@ class UTF32BETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -477,7 +489,7 @@ class UTF16Test(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", # first byte of BOM read "", # second byte of BOM read => byteorder known @@ -489,6 +501,10 @@ class UTF16Test(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -526,7 +542,7 @@ class UTF16LETest(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "\x00", @@ -536,6 +552,10 @@ class UTF16LETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -565,7 +585,7 @@ class UTF16BETest(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "\x00", @@ -575,6 +595,10 @@ class UTF16BETest(ReadTest): "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -604,7 +628,7 @@ class UTF8Test(ReadTest): def test_partial(self): self.check_partial( - "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff\U00010000", [ "\x00", "\x00", @@ -617,6 +641,10 @@ class UTF8Test(ReadTest): "\x00\xff\u07ff\u0800", "\x00\xff\u07ff\u0800", "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff\U00010000", ] ) @@ -694,7 +722,7 @@ class UTF8SigTest(ReadTest): def test_partial(self): self.check_partial( - "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", [ "", "", @@ -713,6 +741,10 @@ class UTF8SigTest(ReadTest): "\ufeff\x00\xff\u07ff\u0800", "\ufeff\x00\xff\u07ff\u0800", "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", ] ) |