summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHye-Shik Chang <hyeshik@gmail.com>2006-10-08 13:48:34 (GMT)
committerHye-Shik Chang <hyeshik@gmail.com>2006-10-08 13:48:34 (GMT)
commitb788346573522a96d8e64d95307b36ad2fc887f9 (patch)
tree1878bc8672b857814fb7ad0ea9568b3e33de504d
parent846f73a530f6b426362e2d80a8cbd4fd16a3ee9f (diff)
downloadcpython-b788346573522a96d8e64d95307b36ad2fc887f9.zip
cpython-b788346573522a96d8e64d95307b36ad2fc887f9.tar.gz
cpython-b788346573522a96d8e64d95307b36ad2fc887f9.tar.bz2
Bug #1572832: fix a bug in ISO-2022 codecs which may cause segfault
when encoding non-BMP unicode characters. (Submitted by Ray Chason)
-rw-r--r--Lib/test/test_multibytecodec.py10
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c36
3 files changed, 35 insertions, 14 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
index 800456e..a8666d3 100644
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -208,6 +208,16 @@ class Test_ISO2022(unittest.TestCase):
e = u'\u3406'.encode(encoding)
self.failIf(filter(lambda x: x >= '\x80', e))
+ def test_bug1572832(self):
+ if sys.maxunicode >= 0x10000:
+ myunichr = unichr
+ else:
+ myunichr = lambda x: unichr(0xD7C0+(x>>10)) + unichr(0xDC00+(x&0x3FF))
+
+ for x in xrange(0x10000, 0x110000):
+ # Any ISO 2022 codec will cause the segfault
+ myunichr(x).encode('iso_2022_jp', 'ignore')
+
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_MultibyteCodec))
diff --git a/Misc/NEWS b/Misc/NEWS
index 787e332..d729648 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -123,6 +123,9 @@ Library
Extension Modules
-----------------
+- Bug #1572832: fix a bug in ISO-2022 codecs which may cause segfault
+ when encoding non-BMP unicode characters.
+
- Bug #1556784: allow format strings longer than 127 characters in
datetime's strftime function.
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 2a11e9a..55196a9 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -592,9 +592,11 @@ ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
- TRYMAP_ENC(cp949, coded, *data)
- if (!(coded & 0x8000))
- return coded;
+ if (*data < 0x10000) {
+ TRYMAP_ENC(cp949, coded, *data)
+ if (!(coded & 0x8000))
+ return coded;
+ }
return MAP_UNMAPPABLE;
}
@@ -628,11 +630,13 @@ jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
- if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
- return 0x2140;
- else TRYMAP_ENC(jisxcommon, coded, *data) {
- if (!(coded & 0x8000))
- return coded;
+ if (*data < 0x10000) {
+ if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
+ return 0x2140;
+ else TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (!(coded & 0x8000))
+ return coded;
+ }
}
return MAP_UNMAPPABLE;
}
@@ -665,9 +669,11 @@ jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
- TRYMAP_ENC(jisxcommon, coded, *data) {
- if (coded & 0x8000)
- return coded & 0x7fff;
+ if (*data < 0x10000) {
+ TRYMAP_ENC(jisxcommon, coded, *data) {
+ if (coded & 0x8000)
+ return coded & 0x7fff;
+ }
}
return MAP_UNMAPPABLE;
}
@@ -970,9 +976,11 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
{
DBCHAR coded;
assert(*length == 1);
- TRYMAP_ENC(gbcommon, coded, *data) {
- if (!(coded & 0x8000))
- return coded;
+ if (*data < 0x10000) {
+ TRYMAP_ENC(gbcommon, coded, *data) {
+ if (!(coded & 0x8000))
+ return coded;
+ }
}
return MAP_UNMAPPABLE;
}