summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2019-03-19 23:05:51 (GMT)
committerGitHub <noreply@github.com>2019-03-19 23:05:51 (GMT)
commitc70ab02df2894c34da2223fc3798c0404b41fd79 (patch)
treef97dfba89426b0fffa50b103a0afd213f42bca4a
parentfd23cfa464ab93273370475900819c1ea37c852f (diff)
downloadcpython-c70ab02df2894c34da2223fc3798c0404b41fd79.zip
cpython-c70ab02df2894c34da2223fc3798c0404b41fd79.tar.gz
cpython-c70ab02df2894c34da2223fc3798c0404b41fd79.tar.bz2
bpo-36365: Rewrite structseq_repr() using _PyUnicodeWriter (GH-12440)
No longer limit repr(structseq) to 512 bytes. Use _PyUnicodeWriter for better performance and to write directly Unicode rather than encoding repr() value to UTF-8 and then decoding from UTF-8.
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst1
-rw-r--r--Objects/structseq.c126
2 files changed, 69 insertions, 58 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst
new file mode 100644
index 0000000..206de56
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst
@@ -0,0 +1 @@
+repr(structseq) is no longer limited to 512 bytes.
diff --git a/Objects/structseq.c b/Objects/structseq.c
index 1c37845..5278313 100644
--- a/Objects/structseq.c
+++ b/Objects/structseq.c
@@ -168,78 +168,88 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict)
static PyObject *
structseq_repr(PyStructSequence *obj)
{
- /* buffer and type size were chosen well considered. */
-#define REPR_BUFFER_SIZE 512
-#define TYPE_MAXSIZE 100
-
PyTypeObject *typ = Py_TYPE(obj);
- Py_ssize_t i;
- int removelast = 0;
- Py_ssize_t len;
- char buf[REPR_BUFFER_SIZE];
- char *endofbuf, *pbuf = buf;
-
- /* pointer to end of writeable buffer; safes space for "...)\0" */
- endofbuf= &buf[REPR_BUFFER_SIZE-5];
-
- /* "typename(", limited to TYPE_MAXSIZE */
- len = strlen(typ->tp_name);
- len = Py_MIN(len, TYPE_MAXSIZE);
- memcpy(pbuf, typ->tp_name, len);
- pbuf += len;
- *pbuf++ = '(';
-
- for (i=0; i < VISIBLE_SIZE(obj); i++) {
- PyObject *val, *repr;
- const char *cname, *crepr;
-
- cname = typ->tp_members[i].name;
- if (cname == NULL) {
+ _PyUnicodeWriter writer;
+
+ /* Write "typename(" */
+ PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name,
+ strlen(typ->tp_name),
+ NULL);
+ if (type_name == NULL) {
+ goto error;
+ }
+
+ _PyUnicodeWriter_Init(&writer);
+ writer.overallocate = 1;
+ /* count 5 characters per item: "x=1, " */
+ writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1
+ + VISIBLE_SIZE(obj) * 5 + 1);
+
+ if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) {
+ Py_DECREF(type_name);
+ goto error;
+ }
+ Py_DECREF(type_name);
+
+ if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) {
+ goto error;
+ }
+
+ for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) {
+ if (i > 0) {
+ /* Write ", " */
+ if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) {
+ goto error;
+ }
+ }
+
+ /* Write "name=repr" */
+ const char *name_utf8 = typ->tp_members[i].name;
+ if (name_utf8 == NULL) {
PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL"
" for type %.500s", i, typ->tp_name);
- return NULL;
+ goto error;
}
- val = PyStructSequence_GET_ITEM(obj, i);
- repr = PyObject_Repr(val);
- if (repr == NULL)
- return NULL;
- crepr = PyUnicode_AsUTF8(repr);
- if (crepr == NULL) {
- Py_DECREF(repr);
- return NULL;
+
+ PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL);
+ if (name == NULL) {
+ goto error;
+ }
+ if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) {
+ Py_DECREF(name);
+ goto error;
}
+ Py_DECREF(name);
- /* + 3: keep space for "=" and ", " */
- len = strlen(cname) + strlen(crepr) + 3;
- if ((pbuf+len) <= endofbuf) {
- strcpy(pbuf, cname);
- pbuf += strlen(cname);
- *pbuf++ = '=';
- strcpy(pbuf, crepr);
- pbuf += strlen(crepr);
- *pbuf++ = ',';
- *pbuf++ = ' ';
- removelast = 1;
- Py_DECREF(repr);
+ if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) {
+ goto error;
}
- else {
- strcpy(pbuf, "...");
- pbuf += 3;
- removelast = 0;
+
+ PyObject *value = PyStructSequence_GET_ITEM(obj, i);
+ assert(value != NULL);
+ PyObject *repr = PyObject_Repr(value);
+ if (repr == NULL) {
+ goto error;
+ }
+ if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) {
Py_DECREF(repr);
- break;
+ goto error;
}
+ Py_DECREF(repr);
}
- if (removelast) {
- /* overwrite last ", " */
- pbuf-=2;
+
+ if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) {
+ goto error;
}
- *pbuf++ = ')';
- *pbuf = '\0';
- return PyUnicode_FromString(buf);
+ return _PyUnicodeWriter_Finish(&writer);
+
+error:
+ _PyUnicodeWriter_Dealloc(&writer);
+ return NULL;
}
+
static PyObject *
structseq_reduce(PyStructSequence* self, PyObject *Py_UNUSED(ignored))
{