diff options
author | Victor Stinner <vstinner@redhat.com> | 2019-03-19 23:05:51 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-19 23:05:51 (GMT) |
commit | c70ab02df2894c34da2223fc3798c0404b41fd79 (patch) | |
tree | f97dfba89426b0fffa50b103a0afd213f42bca4a | |
parent | fd23cfa464ab93273370475900819c1ea37c852f (diff) | |
download | cpython-c70ab02df2894c34da2223fc3798c0404b41fd79.zip cpython-c70ab02df2894c34da2223fc3798c0404b41fd79.tar.gz cpython-c70ab02df2894c34da2223fc3798c0404b41fd79.tar.bz2 |
bpo-36365: Rewrite structseq_repr() using _PyUnicodeWriter (GH-12440)
No longer limit repr(structseq) to 512 bytes. Use _PyUnicodeWriter
for better performance and to write directly Unicode rather than
encoding repr() value to UTF-8 and then decoding from UTF-8.
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst | 1 | ||||
-rw-r--r-- | Objects/structseq.c | 126 |
2 files changed, 69 insertions, 58 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst new file mode 100644 index 0000000..206de56 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst @@ -0,0 +1 @@ +repr(structseq) is no longer limited to 512 bytes. diff --git a/Objects/structseq.c b/Objects/structseq.c index 1c37845..5278313 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -168,78 +168,88 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict) static PyObject * structseq_repr(PyStructSequence *obj) { - /* buffer and type size were chosen well considered. */ -#define REPR_BUFFER_SIZE 512 -#define TYPE_MAXSIZE 100 - PyTypeObject *typ = Py_TYPE(obj); - Py_ssize_t i; - int removelast = 0; - Py_ssize_t len; - char buf[REPR_BUFFER_SIZE]; - char *endofbuf, *pbuf = buf; - - /* pointer to end of writeable buffer; safes space for "...)\0" */ - endofbuf= &buf[REPR_BUFFER_SIZE-5]; - - /* "typename(", limited to TYPE_MAXSIZE */ - len = strlen(typ->tp_name); - len = Py_MIN(len, TYPE_MAXSIZE); - memcpy(pbuf, typ->tp_name, len); - pbuf += len; - *pbuf++ = '('; - - for (i=0; i < VISIBLE_SIZE(obj); i++) { - PyObject *val, *repr; - const char *cname, *crepr; - - cname = typ->tp_members[i].name; - if (cname == NULL) { + _PyUnicodeWriter writer; + + /* Write "typename(" */ + PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name, + strlen(typ->tp_name), + NULL); + if (type_name == NULL) { + goto error; + } + + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + /* count 5 characters per item: "x=1, " */ + writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1 + + VISIBLE_SIZE(obj) * 5 + 1); + + if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) { + Py_DECREF(type_name); + goto error; + } + Py_DECREF(type_name); + + if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) { + goto error; + } + + for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) { + if (i > 0) { + /* Write ", " */ + if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) { + goto error; + } + } + + /* Write "name=repr" */ + const char *name_utf8 = typ->tp_members[i].name; + if (name_utf8 == NULL) { PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL" " for type %.500s", i, typ->tp_name); - return NULL; + goto error; } - val = PyStructSequence_GET_ITEM(obj, i); - repr = PyObject_Repr(val); - if (repr == NULL) - return NULL; - crepr = PyUnicode_AsUTF8(repr); - if (crepr == NULL) { - Py_DECREF(repr); - return NULL; + + PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL); + if (name == NULL) { + goto error; + } + if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) { + Py_DECREF(name); + goto error; } + Py_DECREF(name); - /* + 3: keep space for "=" and ", " */ - len = strlen(cname) + strlen(crepr) + 3; - if ((pbuf+len) <= endofbuf) { - strcpy(pbuf, cname); - pbuf += strlen(cname); - *pbuf++ = '='; - strcpy(pbuf, crepr); - pbuf += strlen(crepr); - *pbuf++ = ','; - *pbuf++ = ' '; - removelast = 1; - Py_DECREF(repr); + if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) { + goto error; } - else { - strcpy(pbuf, "..."); - pbuf += 3; - removelast = 0; + + PyObject *value = PyStructSequence_GET_ITEM(obj, i); + assert(value != NULL); + PyObject *repr = PyObject_Repr(value); + if (repr == NULL) { + goto error; + } + if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) { Py_DECREF(repr); - break; + goto error; } + Py_DECREF(repr); } - if (removelast) { - /* overwrite last ", " */ - pbuf-=2; + + if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) { + goto error; } - *pbuf++ = ')'; - *pbuf = '\0'; - return PyUnicode_FromString(buf); + return _PyUnicodeWriter_Finish(&writer); + +error: + _PyUnicodeWriter_Dealloc(&writer); + return NULL; } + static PyObject * structseq_reduce(PyStructSequence* self, PyObject *Py_UNUSED(ignored)) { |