diff options
Diffstat (limited to 'Modules/_testcapi')
-rw-r--r-- | Modules/_testcapi/unicode.c | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index 79f99c4..da658b4 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -375,6 +375,119 @@ test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args)) static PyObject * +test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args)) +{ + // test PyUnicodeWriter_DecodeUTF8Stateful() + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } + if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, '-') < 0) { + goto error; + } + if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, '-') < 0) { + goto error; + } + + // incomplete trailing UTF-8 sequence + if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "replace", NULL) < 0) { + goto error; + } + + PyObject *result = PyUnicodeWriter_Finish(writer); + if (result == NULL) { + return NULL; + } + assert(PyUnicode_EqualToUTF8(result, + "ignore-replace\xef\xbf\xbd" + "-incomplete\xef\xbf\xbd")); + Py_DECREF(result); + + Py_RETURN_NONE; + +error: + PyUnicodeWriter_Discard(writer); + return NULL; +} + + +static PyObject * +test_unicodewriter_decode_utf8_consumed(PyObject *self, PyObject *Py_UNUSED(args)) +{ + // test PyUnicodeWriter_DecodeUTF8Stateful() + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } + Py_ssize_t consumed; + + // valid string + consumed = 12345; + if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "text", -1, NULL, &consumed) < 0) { + goto error; + } + assert(consumed == 4); + if (PyUnicodeWriter_WriteChar(writer, '-') < 0) { + goto error; + } + + // non-ASCII + consumed = 12345; + if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "\xC3\xA9-\xE2\x82\xAC", 6, NULL, &consumed) < 0) { + goto error; + } + assert(consumed == 6); + if (PyUnicodeWriter_WriteChar(writer, '-') < 0) { + goto error; + } + + // consumed is 0 if write fails + consumed = 12345; + assert(PyUnicodeWriter_DecodeUTF8Stateful(writer, "invalid\xFF", -1, NULL, &consumed) < 0); + PyErr_Clear(); + assert(consumed == 0); + + // ignore error handler + consumed = 12345; + if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "more\xFF", -1, "ignore", &consumed) < 0) { + goto error; + } + assert(consumed == 5); + if (PyUnicodeWriter_WriteChar(writer, '-') < 0) { + goto error; + } + + // incomplete trailing UTF-8 sequence + consumed = 12345; + if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "ignore", &consumed) < 0) { + goto error; + } + assert(consumed == 10); + + PyObject *result = PyUnicodeWriter_Finish(writer); + if (result == NULL) { + return NULL; + } + assert(PyUnicode_EqualToUTF8(result, + "text-\xC3\xA9-\xE2\x82\xAC-" + "more-incomplete")); + Py_DECREF(result); + + Py_RETURN_NONE; + +error: + PyUnicodeWriter_Discard(writer); + return NULL; +} + + +static PyObject * test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args)) { PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); @@ -436,6 +549,42 @@ test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args } +static PyObject * +test_unicodewriter_widechar(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } + if (PyUnicodeWriter_WriteWideChar(writer, L"latin1=\xE9 IGNORED", 8) < 0) { + goto error; + } + if (PyUnicodeWriter_WriteWideChar(writer, L"-", 1) < 0) { + goto error; + } + if (PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1) < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, '.') < 0) { + goto error; + } + + PyObject *result = PyUnicodeWriter_Finish(writer); + if (result == NULL) { + return NULL; + } + assert(PyUnicode_EqualToUTF8(result, + "latin1=\xC3\xA9-euro=\xE2\x82\xAC.")); + Py_DECREF(result); + + Py_RETURN_NONE; + +error: + PyUnicodeWriter_Discard(writer); + return NULL; +} + + static PyMethodDef TestMethods[] = { {"unicode_new", unicode_new, METH_VARARGS}, {"unicode_fill", unicode_fill, METH_VARARGS}, @@ -448,8 +597,11 @@ static PyMethodDef TestMethods[] = { {"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS}, {"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS}, {"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS}, + {"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS}, + {"test_unicodewriter_decode_utf8_consumed", test_unicodewriter_decode_utf8_consumed, METH_NOARGS}, {"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS}, {"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS}, + {"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS}, {NULL}, }; |