summaryrefslogtreecommitdiffstats
path: root/Modules/_testcapi
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_testcapi')
-rw-r--r--Modules/_testcapi/unicode.c152
1 files changed, 152 insertions, 0 deletions
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index 79f99c4..da658b4 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -375,6 +375,119 @@ test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
static PyObject *
+test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args))
+{
+ // test PyUnicodeWriter_DecodeUTF8Stateful()
+ PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+ if (writer == NULL) {
+ return NULL;
+ }
+ if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0) {
+ goto error;
+ }
+ if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
+ goto error;
+ }
+ if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0) {
+ goto error;
+ }
+ if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
+ goto error;
+ }
+
+ // incomplete trailing UTF-8 sequence
+ if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "replace", NULL) < 0) {
+ goto error;
+ }
+
+ PyObject *result = PyUnicodeWriter_Finish(writer);
+ if (result == NULL) {
+ return NULL;
+ }
+ assert(PyUnicode_EqualToUTF8(result,
+ "ignore-replace\xef\xbf\xbd"
+ "-incomplete\xef\xbf\xbd"));
+ Py_DECREF(result);
+
+ Py_RETURN_NONE;
+
+error:
+ PyUnicodeWriter_Discard(writer);
+ return NULL;
+}
+
+
+static PyObject *
+test_unicodewriter_decode_utf8_consumed(PyObject *self, PyObject *Py_UNUSED(args))
+{
+ // test PyUnicodeWriter_DecodeUTF8Stateful()
+ PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+ if (writer == NULL) {
+ return NULL;
+ }
+ Py_ssize_t consumed;
+
+ // valid string
+ consumed = 12345;
+ if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "text", -1, NULL, &consumed) < 0) {
+ goto error;
+ }
+ assert(consumed == 4);
+ if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
+ goto error;
+ }
+
+ // non-ASCII
+ consumed = 12345;
+ if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "\xC3\xA9-\xE2\x82\xAC", 6, NULL, &consumed) < 0) {
+ goto error;
+ }
+ assert(consumed == 6);
+ if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
+ goto error;
+ }
+
+ // consumed is 0 if write fails
+ consumed = 12345;
+ assert(PyUnicodeWriter_DecodeUTF8Stateful(writer, "invalid\xFF", -1, NULL, &consumed) < 0);
+ PyErr_Clear();
+ assert(consumed == 0);
+
+ // ignore error handler
+ consumed = 12345;
+ if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "more\xFF", -1, "ignore", &consumed) < 0) {
+ goto error;
+ }
+ assert(consumed == 5);
+ if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
+ goto error;
+ }
+
+ // incomplete trailing UTF-8 sequence
+ consumed = 12345;
+ if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "ignore", &consumed) < 0) {
+ goto error;
+ }
+ assert(consumed == 10);
+
+ PyObject *result = PyUnicodeWriter_Finish(writer);
+ if (result == NULL) {
+ return NULL;
+ }
+ assert(PyUnicode_EqualToUTF8(result,
+ "text-\xC3\xA9-\xE2\x82\xAC-"
+ "more-incomplete"));
+ Py_DECREF(result);
+
+ Py_RETURN_NONE;
+
+error:
+ PyUnicodeWriter_Discard(writer);
+ return NULL;
+}
+
+
+static PyObject *
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
{
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
@@ -436,6 +549,42 @@ test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args
}
+static PyObject *
+test_unicodewriter_widechar(PyObject *self, PyObject *Py_UNUSED(args))
+{
+ PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+ if (writer == NULL) {
+ return NULL;
+ }
+ if (PyUnicodeWriter_WriteWideChar(writer, L"latin1=\xE9 IGNORED", 8) < 0) {
+ goto error;
+ }
+ if (PyUnicodeWriter_WriteWideChar(writer, L"-", 1) < 0) {
+ goto error;
+ }
+ if (PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1) < 0) {
+ goto error;
+ }
+ if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
+ goto error;
+ }
+
+ PyObject *result = PyUnicodeWriter_Finish(writer);
+ if (result == NULL) {
+ return NULL;
+ }
+ assert(PyUnicode_EqualToUTF8(result,
+ "latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
+ Py_DECREF(result);
+
+ Py_RETURN_NONE;
+
+error:
+ PyUnicodeWriter_Discard(writer);
+ return NULL;
+}
+
+
static PyMethodDef TestMethods[] = {
{"unicode_new", unicode_new, METH_VARARGS},
{"unicode_fill", unicode_fill, METH_VARARGS},
@@ -448,8 +597,11 @@ static PyMethodDef TestMethods[] = {
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
{"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
+ {"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS},
+ {"test_unicodewriter_decode_utf8_consumed", test_unicodewriter_decode_utf8_consumed, METH_NOARGS},
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
{"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
+ {"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS},
{NULL},
};