diff options
author | Inada Naoki <songofacandy@gmail.com> | 2020-03-14 03:43:18 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-14 03:43:18 (GMT) |
commit | c7ad974d341d3edb6b9d2a2dcae4d3d4794ada6b (patch) | |
tree | 2026fd46b762fb2deaf9091e4d7e09dc198bc2d3 /Modules/_testcapimodule.c | |
parent | 8fb02b6e1942811c8d81041e7df3f5f1f4b1d410 (diff) | |
download | cpython-c7ad974d341d3edb6b9d2a2dcae4d3d4794ada6b.zip cpython-c7ad974d341d3edb6b9d2a2dcae4d3d4794ada6b.tar.gz cpython-c7ad974d341d3edb6b9d2a2dcae4d3d4794ada6b.tar.bz2 |
bpo-39087: Add _PyUnicode_GetUTF8Buffer() (GH-17659)
Co-authored-by: Victor Stinner <vstinner@python.org>
Diffstat (limited to 'Modules/_testcapimodule.c')
-rw-r--r-- | Modules/_testcapimodule.c | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3cc5586..09b7706 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1968,6 +1968,216 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) } static PyObject * +unicode_getutf8buffer(PyObject *self, PyObject *args) +{ + PyObject *unicode; + const char *errors = NULL; + if(!PyArg_ParseTuple(args, "O|s", &unicode, &errors)) { + return NULL; + } + + Py_buffer buffer; + if (_PyUnicode_GetUTF8Buffer(unicode, errors, &buffer) < 0) { + return NULL; + } + + assert(buffer.obj != NULL); + assert(buffer.obj == unicode || PyBytes_CheckExact(buffer.obj)); + + PyObject *result = PyBytes_FromStringAndSize(buffer.buf, buffer.len); + PyBuffer_Release(&buffer); + return result; +} + +static PyObject * +unicode_test_getutf8buffer(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + Py_buffer buf; + + // Test 1: ASCII string + PyObject *str = PyUnicode_FromString("hello"); + if (str == NULL) { + return NULL; + } + Py_ssize_t refcnt = Py_REFCNT(str); + + // _PyUnicode_GetUTF8Buffer() must not fail for ASCII string. + int ret = _PyUnicode_GetUTF8Buffer(str, NULL, &buf); + assert(ret == 0); + + if (buf.obj != str) { + PyErr_Format(TestError, + "buf.obj must be equal to str. (%s:%d)", + __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + + if (buf.len != PyUnicode_GET_LENGTH(str)) { + PyErr_Format(TestError, + "buf.len must be equal to len(str). (%s:%d)", + __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + assert(((const char*)buf.buf)[5] == '\0'); + + if ((Py_UCS1*)buf.buf != PyUnicode_1BYTE_DATA(str)) { + PyErr_Format(TestError, + "buf.buf must be equal to PyUnicode_1BYTE_DATA(str). (%s:%d)", + __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + + if (refcnt + 1 != Py_REFCNT(str)) { + PyErr_Format(TestError, + "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)", + refcnt + 1, Py_REFCNT(str), + __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + + PyBuffer_Release(&buf); + + if (refcnt != Py_REFCNT(str)) { + PyErr_Format(TestError, + "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)", + refcnt, Py_REFCNT(str), + __FILE__, __LINE__); + Py_DECREF(str); + return NULL; + } + + Py_DECREF(str); + + // Test 2: non-ASCII string + + // "hello" in Japanese. len(str)==5, len(str.encode()) == 15. + str = PyUnicode_FromString("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf"); + if (str == NULL) { + return NULL; + } + refcnt = Py_REFCNT(str); + assert(PyUnicode_GET_LENGTH(str) == 5); + + if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) { + Py_DECREF(str); + if (!PyErr_Occurred()) { + PyErr_Format(TestError, + "_PyUnicode_GetUTF8Buffer() returned nonzero " + "without exception set. (%s:%d)", + __FILE__, __LINE__); + } + return NULL; + } + + if (!PyBytes_CheckExact(buf.obj)) { + PyErr_Format(TestError, + "buf.obj must be a bytes object, got %R (%s:%d)", + buf.obj, __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + + if (buf.len != 15) { + PyErr_Format(TestError, + "Expected buf.len == 15, actual %zd (%s:%d)", + buf.len, __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + assert(((const char*)buf.buf)[15] == '\0'); + + if (refcnt != Py_REFCNT(str)) { + PyErr_Format(TestError, + "Py_REFCNT(str) must not be changed. (%s:%d)", + __FILE__, __LINE__); + // Do not DECREF here because refcnt is broken. + return NULL; + } + + PyBuffer_Release(&buf); + + // Test 3: There is a UTF-8 cache + // Reuse str of the previoss test. + + const char *cache = PyUnicode_AsUTF8(str); + if (cache == NULL) { + return NULL; + } + + if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) { + Py_DECREF(str); + if (!PyErr_Occurred()) { + PyErr_Format(TestError, + "_PyUnicode_GetUTF8Buffer() returned nonzero " + "without exception set. (%s:%d)", + __FILE__, __LINE__); + } + return NULL; + } + + if (buf.obj != str) { + PyErr_Format(TestError, + "buf.obj must be equal to str. (%s:%d)", + __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + + if (buf.buf != cache) { + PyErr_Format(TestError, + "buf.buf must be equal to the UTF-8 cache (%s:%d)", + __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + + if (buf.len != 15) { + PyErr_Format(TestError, + "Expected buf.len == 15, actual %zd (%s:%d)", + buf.len, __FILE__, __LINE__); + PyBuffer_Release(&buf); + Py_DECREF(str); + return NULL; + } + assert(((const char*)buf.buf)[15] == '\0'); + + if (refcnt + 1 != Py_REFCNT(str)) { + PyErr_Format(TestError, + "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)", + refcnt + 1, Py_REFCNT(str), + __FILE__, __LINE__); + // Do not DECREF here because refcnt is broken. + return NULL; + } + + PyBuffer_Release(&buf); + + if (refcnt != Py_REFCNT(str)) { + PyErr_Format(TestError, + "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)", + refcnt, Py_REFCNT(str), + __FILE__, __LINE__); + // Do not DECREF here because refcnt is broken. + return NULL; + } + + Py_DECREF(str); + Py_RETURN_NONE; +} + +static PyObject * unicode_findchar(PyObject *self, PyObject *args) { PyObject *str; @@ -5392,6 +5602,8 @@ static PyMethodDef TestMethods[] = { {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_getutf8buffer", unicode_getutf8buffer, METH_VARARGS}, + {"unicode_test_getutf8buffer", unicode_test_getutf8buffer, METH_NOARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, |