summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2020-03-14 06:59:27 (GMT)
committerGitHub <noreply@github.com>2020-03-14 06:59:27 (GMT)
commit3a8c56295d6272ad2177d2de8af4c3f824f3ef92 (patch)
tree9f4d7a615bd78987c6d36ee3ab7e084ee8ee1bc1
parent7a5cbc72988aee668816be9d1ef44c0cb4081ff6 (diff)
downloadcpython-3a8c56295d6272ad2177d2de8af4c3f824f3ef92.zip
cpython-3a8c56295d6272ad2177d2de8af4c3f824f3ef92.tar.gz
cpython-3a8c56295d6272ad2177d2de8af4c3f824f3ef92.tar.bz2
Revert "bpo-39087: Add _PyUnicode_GetUTF8Buffer()" (GH-18985)
* Revert "bpo-39087: Add _PyUnicode_GetUTF8Buffer() (GH-17659)" This reverts commit c7ad974d341d3edb6b9d2a2dcae4d3d4794ada6b. * Update unicodeobject.h
-rw-r--r--Include/cpython/unicodeobject.h13
-rw-r--r--Lib/test/test_unicode.py22
-rw-r--r--Misc/NEWS.d/next/C API/2019-12-19-21-19-53.bpo-39087.l4A11-.rst2
-rw-r--r--Modules/_testcapimodule.c212
-rw-r--r--Objects/unicodeobject.c35
5 files changed, 0 insertions, 284 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index be91d2d..0df6479 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -734,19 +734,6 @@ PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
/* --- Manage the default encoding ---------------------------------------- */
-/* Get a buffer to the UTF-8 encoding of the Unicode object unicode.
- Returns -1 on error.
-
- Successful calls must be paired to
- calls to PyBuffer_Release.
-*/
-
-PyAPI_FUNC(int) _PyUnicode_GetUTF8Buffer(
- PyObject *unicode, /* Unicode object */
- const char *errors, /* error handling */
- Py_buffer *view /* (out) buffer to the UTF-8 encoding */
- );
-
/* Returns a pointer to the default encoding (UTF-8) of the
Unicode object unicode and the size of the encoded representation
in bytes stored in *size.
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 0522513..2839889 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2830,28 +2830,6 @@ class CAPITest(unittest.TestCase):
self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')
- # Test _PyUnicode_GetUTF8Buffer()
- @support.cpython_only
- def test_getutf8buffer(self):
- from _testcapi import unicode_getutf8buffer, unicode_test_getutf8buffer
-
- # Run tests wrtten in C. Raise an error when test failed.
- unicode_test_getutf8buffer()
-
- ascii_ = "foo"
- bmp = '\u0100'
- bmp2 = '\uffff'
- nonbmp = chr(0x10ffff)
- surrogates = 'a\ud800b\udfffc'
-
- self.assertEqual(unicode_getutf8buffer(ascii_), b'foo')
- self.assertEqual(unicode_getutf8buffer(bmp), b'\xc4\x80')
- self.assertEqual(unicode_getutf8buffer(bmp2), b'\xef\xbf\xbf')
- self.assertEqual(unicode_getutf8buffer(nonbmp), b'\xf4\x8f\xbf\xbf')
- self.assertRaises(UnicodeEncodeError, unicode_getutf8buffer, surrogates)
- self.assertEqual(unicode_getutf8buffer(surrogates, "surrogatepass"),
- b'a\xed\xa0\x80b\xed\xbf\xbfc')
-
# Test PyUnicode_AsUTF8()
@support.cpython_only
def test_asutf8(self):
diff --git a/Misc/NEWS.d/next/C API/2019-12-19-21-19-53.bpo-39087.l4A11-.rst b/Misc/NEWS.d/next/C API/2019-12-19-21-19-53.bpo-39087.l4A11-.rst
deleted file mode 100644
index 2c2c85d..0000000
--- a/Misc/NEWS.d/next/C API/2019-12-19-21-19-53.bpo-39087.l4A11-.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Add new ``_PyUnicode_GetUTF8Buffer`` private API to get UTF-8 encode of the
-unicode object without cache or extra allocation.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 09b7706..3cc5586 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1968,216 +1968,6 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
}
static PyObject *
-unicode_getutf8buffer(PyObject *self, PyObject *args)
-{
- PyObject *unicode;
- const char *errors = NULL;
- if(!PyArg_ParseTuple(args, "O|s", &unicode, &errors)) {
- return NULL;
- }
-
- Py_buffer buffer;
- if (_PyUnicode_GetUTF8Buffer(unicode, errors, &buffer) < 0) {
- return NULL;
- }
-
- assert(buffer.obj != NULL);
- assert(buffer.obj == unicode || PyBytes_CheckExact(buffer.obj));
-
- PyObject *result = PyBytes_FromStringAndSize(buffer.buf, buffer.len);
- PyBuffer_Release(&buffer);
- return result;
-}
-
-static PyObject *
-unicode_test_getutf8buffer(PyObject *self, PyObject *Py_UNUSED(ignored))
-{
- Py_buffer buf;
-
- // Test 1: ASCII string
- PyObject *str = PyUnicode_FromString("hello");
- if (str == NULL) {
- return NULL;
- }
- Py_ssize_t refcnt = Py_REFCNT(str);
-
- // _PyUnicode_GetUTF8Buffer() must not fail for ASCII string.
- int ret = _PyUnicode_GetUTF8Buffer(str, NULL, &buf);
- assert(ret == 0);
-
- if (buf.obj != str) {
- PyErr_Format(TestError,
- "buf.obj must be equal to str. (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.len != PyUnicode_GET_LENGTH(str)) {
- PyErr_Format(TestError,
- "buf.len must be equal to len(str). (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
- assert(((const char*)buf.buf)[5] == '\0');
-
- if ((Py_UCS1*)buf.buf != PyUnicode_1BYTE_DATA(str)) {
- PyErr_Format(TestError,
- "buf.buf must be equal to PyUnicode_1BYTE_DATA(str). (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (refcnt + 1 != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt + 1, Py_REFCNT(str),
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- PyBuffer_Release(&buf);
-
- if (refcnt != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt, Py_REFCNT(str),
- __FILE__, __LINE__);
- Py_DECREF(str);
- return NULL;
- }
-
- Py_DECREF(str);
-
- // Test 2: non-ASCII string
-
- // "hello" in Japanese. len(str)==5, len(str.encode()) == 15.
- str = PyUnicode_FromString("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf");
- if (str == NULL) {
- return NULL;
- }
- refcnt = Py_REFCNT(str);
- assert(PyUnicode_GET_LENGTH(str) == 5);
-
- if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
- Py_DECREF(str);
- if (!PyErr_Occurred()) {
- PyErr_Format(TestError,
- "_PyUnicode_GetUTF8Buffer() returned nonzero "
- "without exception set. (%s:%d)",
- __FILE__, __LINE__);
- }
- return NULL;
- }
-
- if (!PyBytes_CheckExact(buf.obj)) {
- PyErr_Format(TestError,
- "buf.obj must be a bytes object, got %R (%s:%d)",
- buf.obj, __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.len != 15) {
- PyErr_Format(TestError,
- "Expected buf.len == 15, actual %zd (%s:%d)",
- buf.len, __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
- assert(((const char*)buf.buf)[15] == '\0');
-
- if (refcnt != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str) must not be changed. (%s:%d)",
- __FILE__, __LINE__);
- // Do not DECREF here because refcnt is broken.
- return NULL;
- }
-
- PyBuffer_Release(&buf);
-
- // Test 3: There is a UTF-8 cache
- // Reuse str of the previoss test.
-
- const char *cache = PyUnicode_AsUTF8(str);
- if (cache == NULL) {
- return NULL;
- }
-
- if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
- Py_DECREF(str);
- if (!PyErr_Occurred()) {
- PyErr_Format(TestError,
- "_PyUnicode_GetUTF8Buffer() returned nonzero "
- "without exception set. (%s:%d)",
- __FILE__, __LINE__);
- }
- return NULL;
- }
-
- if (buf.obj != str) {
- PyErr_Format(TestError,
- "buf.obj must be equal to str. (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.buf != cache) {
- PyErr_Format(TestError,
- "buf.buf must be equal to the UTF-8 cache (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.len != 15) {
- PyErr_Format(TestError,
- "Expected buf.len == 15, actual %zd (%s:%d)",
- buf.len, __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
- assert(((const char*)buf.buf)[15] == '\0');
-
- if (refcnt + 1 != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt + 1, Py_REFCNT(str),
- __FILE__, __LINE__);
- // Do not DECREF here because refcnt is broken.
- return NULL;
- }
-
- PyBuffer_Release(&buf);
-
- if (refcnt != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt, Py_REFCNT(str),
- __FILE__, __LINE__);
- // Do not DECREF here because refcnt is broken.
- return NULL;
- }
-
- Py_DECREF(str);
- Py_RETURN_NONE;
-}
-
-static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
PyObject *str;
@@ -5602,8 +5392,6 @@ static PyMethodDef TestMethods[] = {
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
- {"unicode_getutf8buffer", unicode_getutf8buffer, METH_VARARGS},
- {"unicode_test_getutf8buffer", unicode_test_getutf8buffer, METH_NOARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0fea435..3d99f11 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3991,41 +3991,6 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
}
-int
-_PyUnicode_GetUTF8Buffer(PyObject *unicode, const char *errors,
- Py_buffer *view)
-{
- if (!PyUnicode_Check(unicode)) {
- PyErr_BadArgument();
- return -1;
- }
- if (PyUnicode_READY(unicode) == -1) {
- return -1;
- }
-
- if (PyUnicode_UTF8(unicode) != NULL
- && Py_TYPE(unicode)->tp_as_buffer == NULL) {
- return PyBuffer_FillInfo(view, unicode,
- PyUnicode_UTF8(unicode),
- PyUnicode_UTF8_LENGTH(unicode),
- /* readonly */ 1, PyBUF_SIMPLE);
- }
-
- // Unlike PyUnicode_AsUTF8AndSize(), this function doesn't
- // create a UTF-8 cache for speed and efficiency.
- PyObject *bytes = _PyUnicode_AsUTF8String(unicode, errors);
- if (bytes == NULL) {
- return -1;
- }
- assert(PyBytes_CheckExact(bytes));
- if (PyObject_GetBuffer(bytes, view, PyBUF_SIMPLE) < 0) {
- Py_DECREF(bytes);
- return -1;
- }
- return 0;
-}
-
-
static int unicode_fill_utf8(PyObject *unicode);
const char *