diff options
-rw-r--r-- | Doc/c-api/unicode.rst | 2 | ||||
-rw-r--r-- | Include/unicodeobject.h | 2 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 17 | ||||
-rw-r--r-- | Modules/_testcapimodule.c | 31 |
4 files changed, 50 insertions, 2 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 0835477..d1015dd 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -635,7 +635,7 @@ APIs: Copy the string *u* into a UCS4 buffer, including a null character, if *copy_null* is set. Returns *NULL* and sets an exception on error (in - particular, a :exc:`ValueError` if *buflen* is smaller than the length of + particular, a :exc:`SystemError` if *buflen* is smaller than the length of *u*). *buffer* is returned on success. .. versionadded:: 3.3 diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 20331a3..643d10d 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -733,7 +733,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( #endif /* Copy the string into a UCS4 buffer including the null character if copy_null - is set. Return NULL and raise an exception on error. Raise a ValueError if + is set. Return NULL and raise an exception on error. Raise a SystemError if the buffer is smaller than the string. Return buffer on success. buflen is the length of the buffer in (Py_UCS4) characters. */ diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9cb69d4..c98cc14 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2711,6 +2711,23 @@ class CAPITest(unittest.TestCase): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') + # Test PyUnicode_AsUCS4() + @support.cpython_only + def test_asucs4(self): + from _testcapi import unicode_asucs4 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + l = len(s) + self.assertEqual(unicode_asucs4(s, l, 1), s+'\0') + self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff') + self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1) + self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0) + s = '\0'.join([s, s]) + self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') + self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + @support.cpython_only def test_encode_decimal(self): from _testcapi import unicode_encodedecimal diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index b5b8f1a..a6cd386 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1829,6 +1829,36 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) } static PyObject * +unicode_asucs4(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + Py_UCS4 *buffer; + int copy_null; + Py_ssize_t str_len, buf_len; + + if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) { + return NULL; + } + + buf_len = str_len + 1; + buffer = PyMem_NEW(Py_UCS4, buf_len); + if (buffer == NULL) { + return PyErr_NoMemory(); + } + memset(buffer, 0, sizeof(Py_UCS4)*buf_len); + buffer[str_len] = 0xffffU; + + if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) { + PyMem_FREE(buffer); + return NULL; + } + + result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len); + PyMem_FREE(buffer); + return result; +} + +static PyObject * unicode_encodedecimal(PyObject *self, PyObject *args) { Py_UNICODE *unicode; @@ -4030,6 +4060,7 @@ static PyMethodDef TestMethods[] = { {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, + {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, |