diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2007-07-28 07:03:05 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2007-07-28 07:03:05 (GMT) |
commit | f1e0b3f6307084dc3429bd5a1361a5be7be708bb (patch) | |
tree | 2bae20bba6c1842c7880bfd48a5ba8d2199e1aca | |
parent | f25e35b9ec2bb87833108c5bb615113a93894dce (diff) | |
download | cpython-f1e0b3f6307084dc3429bd5a1361a5be7be708bb.zip cpython-f1e0b3f6307084dc3429bd5a1361a5be7be708bb.tar.gz cpython-f1e0b3f6307084dc3429bd5a1361a5be7be708bb.tar.bz2 |
Bug #1704793: Return UTF-16 pair if unicodedata.lookup cannot
represent the result in a single character.
-rw-r--r-- | Lib/test/test_unicodedata.py | 3 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Modules/unicodedata.c | 27 |
3 files changed, 17 insertions, 16 deletions
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 0023bf4..574178d 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -214,6 +214,9 @@ class UnicodeMiscTest(UnicodeDatabaseTest): count += 1 self.assert_(count >= 10) # should have tested at least the ASCII digits + def test_bug_1704793(self): + self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), u'\U00010346') + def test_main(): test.test_support.run_unittest( UnicodeMiscTest, @@ -238,6 +238,9 @@ Core and builtins Library ------- +- Bug #1704793: Return UTF-16 pair if unicodedata.lookup cannot + represent the result in a single character. + - Bug #978833: Close https sockets by releasing the _ssl object. - Change location of the package index to pypi.python.org/pypi diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index fac9adc..a075693 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1077,8 +1077,7 @@ static PyObject * unicodedata_lookup(PyObject* self, PyObject* args) { Py_UCS4 code; - Py_UNICODE str[1]; - char errbuf[256]; + Py_UNICODE str[2]; char* name; int namelen; @@ -1086,24 +1085,20 @@ unicodedata_lookup(PyObject* self, PyObject* args) return NULL; if (!_getcode(self, name, namelen, &code)) { - /* XXX(nnorwitz): why are we allocating for the error msg? - Why not always use snprintf? */ - char fmt[] = "undefined character name '%s'"; - char *buf = PyMem_MALLOC(sizeof(fmt) + namelen); - if (buf) - sprintf(buf, fmt, name); - else { - buf = errbuf; - PyOS_snprintf(buf, sizeof(errbuf), fmt, name); - } - PyErr_SetString(PyExc_KeyError, buf); - if (buf != errbuf) - PyMem_FREE(buf); + PyErr_Format(PyExc_KeyError, "undefined character name '%s'", + name); return NULL; } +#ifndef Py_UNICODE_WIDE + if (code >= 0x10000) { + str[0] = 0xd800 + ((code - 0x10000) >> 10); + str[1] = 0xdc00 + ((code - 0x10000) & 0x3ff); + return PyUnicode_FromUnicode(str, 2); + } +#endif str[0] = (Py_UNICODE) code; - return PyUnicode_FromUnicode(str, 1); + return PyUnicode_FromUnicode(str, 1); } /* XXX Add doc strings. */ |