From 5623ac87bbe5de481957eca5eeae06347612fbeb Mon Sep 17 00:00:00 2001 From: Hai Shi Date: Sat, 20 Jul 2019 02:56:23 -0500 Subject: bpo-37476: Adding tests for asutf8 and asutf8andsize (GH-14531) --- Lib/test/test_unicode.py | 28 ++++++++++++++++++++++++++++ Modules/_testcapimodule.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 177d80d..8be16c8 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2819,6 +2819,34 @@ class CAPITest(unittest.TestCase): self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + # Test PyUnicode_AsUTF8() + @support.cpython_only + def test_asutf8(self): + from _testcapi import unicode_asutf8 + + bmp = '\u0100' + bmp2 = '\uffff' + nonbmp = chr(0x10ffff) + + self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80') + self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf') + self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf') + self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc') + + # Test PyUnicode_AsUTF8AndSize() + @support.cpython_only + def test_asutf8andsize(self): + from _testcapi import unicode_asutf8andsize + + bmp = '\u0100' + bmp2 = '\uffff' + nonbmp = chr(0x10ffff) + + self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2)) + self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3)) + self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) + self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') + # Test PyUnicode_FindChar() @support.cpython_only def test_findchar(self): diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 8f34e93..8a6e741 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1922,6 +1922,48 @@ unicode_asucs4(PyObject *self, PyObject *args) } static PyObject * +unicode_asutf8(PyObject *self, PyObject *args) +{ + PyObject *unicode; + const char *buffer; + + if (!PyArg_ParseTuple(args, "U", &unicode)) { + return NULL; + } + + buffer = PyUnicode_AsUTF8(unicode); + if (buffer == NULL) { + return NULL; + } + + return PyBytes_FromString(buffer); +} + +static PyObject * +unicode_asutf8andsize(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + const char *buffer; + Py_ssize_t utf8_len; + + if(!PyArg_ParseTuple(args, "U", &unicode)) { + return NULL; + } + + buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len); + if (buffer == NULL) { + return NULL; + } + + result = PyBytes_FromString(buffer); + if (result == NULL) { + return NULL; + } + + return Py_BuildValue("(Nn)", result, utf8_len); +} + +static PyObject * unicode_findchar(PyObject *self, PyObject *args) { PyObject *str; @@ -5174,6 +5216,8 @@ static PyMethodDef TestMethods[] = { {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, + {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, + {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, -- cgit v0.12