diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2007-08-04 04:10:18 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2007-08-04 04:10:18 (GMT) |
commit | f3e93a02688849da484a6677320a84c913461fd4 (patch) | |
tree | 939499b8cef1710a91d1255b2333673aa04f9c3f /Lib/test | |
parent | 766d880a2fd0bcb4fca187db255763506e10f96b (diff) | |
download | cpython-f3e93a02688849da484a6677320a84c913461fd4.zip cpython-f3e93a02688849da484a6677320a84c913461fd4.tar.gz cpython-f3e93a02688849da484a6677320a84c913461fd4.tar.bz2 |
Fix gb18030 codec's bug that doesn't map two-byte characters on
GB18030 extension in encoding. (bug reported by Bjorn Stabell)
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_codecmaps_cn.py | 7 | ||||
-rw-r--r-- | Lib/test/test_multibytecodec_support.py | 16 |
2 files changed, 22 insertions, 1 deletions
diff --git a/Lib/test/test_codecmaps_cn.py b/Lib/test/test_codecmaps_cn.py index 75541ac..344fc56 100644 --- a/Lib/test/test_codecmaps_cn.py +++ b/Lib/test/test_codecmaps_cn.py @@ -19,6 +19,13 @@ class TestGBKMap(test_multibytecodec_support.TestBase_Mapping, mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/' \ 'MICSFT/WINDOWS/CP936.TXT' +class TestGB18030Map(test_multibytecodec_support.TestBase_Mapping, + unittest.TestCase): + encoding = 'gb18030' + mapfileurl = 'http://source.icu-project.org/repos/icu/data/' \ + 'trunk/charset/data/xml/gb-18030-2000.xml' + + def test_main(): test_support.run_unittest(__name__) diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index bec32de..197f777 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -5,7 +5,7 @@ # import sys, codecs, os.path -import unittest +import unittest, re from test import test_support from StringIO import StringIO @@ -272,6 +272,12 @@ class TestBase_Mapping(unittest.TestCase): return test_support.open_urlresource(self.mapfileurl) def test_mapping_file(self): + if self.mapfileurl.endswith('.xml'): + self._test_mapping_file_ucm() + else: + self._test_mapping_file_plain() + + def _test_mapping_file_plain(self): unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+')))) urt_wa = {} @@ -303,6 +309,14 @@ class TestBase_Mapping(unittest.TestCase): self._testpoint(csetch, unich) + def _test_mapping_file_ucm(self): + ucmdata = self.open_mapping_file().read() + uc = re.findall('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>', ucmdata) + for uni, coded in uc: + unich = unichr(int(uni, 16)) + codech = ''.join(chr(int(c, 16)) for c in coded.split()) + self._testpoint(codech, unich) + def test_mapping_supplemental(self): for mapping in self.supmaps: self._testpoint(*mapping) |