diff options
author | Hye-Shik Chang <hyeshik@gmail.com> | 2006-10-08 13:48:34 (GMT) |
---|---|---|
committer | Hye-Shik Chang <hyeshik@gmail.com> | 2006-10-08 13:48:34 (GMT) |
commit | b788346573522a96d8e64d95307b36ad2fc887f9 (patch) | |
tree | 1878bc8672b857814fb7ad0ea9568b3e33de504d | |
parent | 846f73a530f6b426362e2d80a8cbd4fd16a3ee9f (diff) | |
download | cpython-b788346573522a96d8e64d95307b36ad2fc887f9.zip cpython-b788346573522a96d8e64d95307b36ad2fc887f9.tar.gz cpython-b788346573522a96d8e64d95307b36ad2fc887f9.tar.bz2 |
Bug #1572832: fix a bug in ISO-2022 codecs which may cause segfault
when encoding non-BMP unicode characters. (Submitted by Ray Chason)
-rw-r--r-- | Lib/test/test_multibytecodec.py | 10 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 36 |
3 files changed, 35 insertions, 14 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 800456e..a8666d3 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -208,6 +208,16 @@ class Test_ISO2022(unittest.TestCase): e = u'\u3406'.encode(encoding) self.failIf(filter(lambda x: x >= '\x80', e)) + def test_bug1572832(self): + if sys.maxunicode >= 0x10000: + myunichr = unichr + else: + myunichr = lambda x: unichr(0xD7C0+(x>>10)) + unichr(0xDC00+(x&0x3FF)) + + for x in xrange(0x10000, 0x110000): + # Any ISO 2022 codec will cause the segfault + myunichr(x).encode('iso_2022_jp', 'ignore') + def test_main(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(Test_MultibyteCodec)) @@ -123,6 +123,9 @@ Library Extension Modules ----------------- +- Bug #1572832: fix a bug in ISO-2022 codecs which may cause segfault + when encoding non-BMP unicode characters. + - Bug #1556784: allow format strings longer than 127 characters in datetime's strftime function. diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 2a11e9a..55196a9 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -592,9 +592,11 @@ ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); - TRYMAP_ENC(cp949, coded, *data) - if (!(coded & 0x8000)) - return coded; + if (*data < 0x10000) { + TRYMAP_ENC(cp949, coded, *data) + if (!(coded & 0x8000)) + return coded; + } return MAP_UNMAPPABLE; } @@ -628,11 +630,13 @@ jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); - if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ - return 0x2140; - else TRYMAP_ENC(jisxcommon, coded, *data) { - if (!(coded & 0x8000)) - return coded; + if (*data < 0x10000) { + if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ + return 0x2140; + else TRYMAP_ENC(jisxcommon, coded, *data) { + if (!(coded & 0x8000)) + return coded; + } } return MAP_UNMAPPABLE; } @@ -665,9 +669,11 @@ jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); - TRYMAP_ENC(jisxcommon, coded, *data) { - if (coded & 0x8000) - return coded & 0x7fff; + if (*data < 0x10000) { + TRYMAP_ENC(jisxcommon, coded, *data) { + if (coded & 0x8000) + return coded & 0x7fff; + } } return MAP_UNMAPPABLE; } @@ -970,9 +976,11 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); - TRYMAP_ENC(gbcommon, coded, *data) { - if (!(coded & 0x8000)) - return coded; + if (*data < 0x10000) { + TRYMAP_ENC(gbcommon, coded, *data) { + if (!(coded & 0x8000)) + return coded; + } } return MAP_UNMAPPABLE; } |