diff options
-rw-r--r-- | Include/internal/pycore_bytesobject.h | 13 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2022-03-19-21-50-59.bpo-47070.wPcsQh.rst | 3 | ||||
-rw-r--r-- | Modules/arraymodule.c | 49 | ||||
-rw-r--r-- | Objects/bytearrayobject.c | 61 | ||||
-rw-r--r-- | Objects/bytesobject.c | 46 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 12 |
6 files changed, 81 insertions, 103 deletions
diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index b806160..9173a4f 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -33,6 +33,19 @@ _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack, const char *needle, Py_ssize_t len_needle, Py_ssize_t offset); + +/** Helper function to implement the repeat and inplace repeat methods on a buffer + * + * len_dest is assumed to be an integer multiple of len_src. + * If src equals dest, then assume the operation is inplace. + * + * This method repeately doubles the number of bytes copied to reduce + * the number of invocations of memcpy. + */ +PyAPI_FUNC(void) +_PyBytes_Repeat(char* dest, Py_ssize_t len_dest, + const char* src, Py_ssize_t len_src); + #ifdef __cplusplus } #endif diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-19-21-50-59.bpo-47070.wPcsQh.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-19-21-50-59.bpo-47070.wPcsQh.rst new file mode 100644 index 0000000..568974f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-19-21-50-59.bpo-47070.wPcsQh.rst @@ -0,0 +1,3 @@ +Improve performance of ``array_inplace_repeat`` by reducing the number of invocations of ``memcpy``. +Refactor the ``repeat`` and inplace ``repeat`` methods of ``array``, ``bytes``, ``bytearray`` +and ``unicodeobject`` to use the common ``_PyBytes_Repeat``. diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 18991f8..a04e6a4 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -10,6 +10,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_bytesobject.h" // _PyBytes_Repeat #include "structmember.h" // PyMemberDef #include <stddef.h> // offsetof() #include <stddef.h> @@ -910,34 +911,24 @@ static PyObject * array_repeat(arrayobject *a, Py_ssize_t n) { array_state *state = find_array_state_by_type(Py_TYPE(a)); - Py_ssize_t size; - arrayobject *np; - Py_ssize_t oldbytes, newbytes; + if (n < 0) n = 0; - if ((Py_SIZE(a) != 0) && (n > PY_SSIZE_T_MAX / Py_SIZE(a))) { + const Py_ssize_t array_length = Py_SIZE(a); + if ((array_length != 0) && (n > PY_SSIZE_T_MAX / array_length)) { return PyErr_NoMemory(); } - size = Py_SIZE(a) * n; - np = (arrayobject *) newarrayobject(state->ArrayType, size, a->ob_descr); + Py_ssize_t size = array_length * n; + arrayobject* np = (arrayobject *) newarrayobject(state->ArrayType, size, a->ob_descr); if (np == NULL) return NULL; if (size == 0) return (PyObject *)np; - oldbytes = Py_SIZE(a) * a->ob_descr->itemsize; - newbytes = oldbytes * n; - /* this follows the code in unicode_repeat */ - if (oldbytes == 1) { - memset(np->ob_item, a->ob_item[0], newbytes); - } else { - Py_ssize_t done = oldbytes; - memcpy(np->ob_item, a->ob_item, oldbytes); - while (done < newbytes) { - Py_ssize_t ncopy = (done <= newbytes-done) ? done : newbytes-done; - memcpy(np->ob_item+done, np->ob_item, ncopy); - done += ncopy; - } - } + + const Py_ssize_t oldbytes = array_length * a->ob_descr->itemsize; + const Py_ssize_t newbytes = oldbytes * n; + _PyBytes_Repeat(np->ob_item, newbytes, a->ob_item, oldbytes); + return (PyObject *)np; } @@ -1075,27 +1066,23 @@ array_inplace_concat(arrayobject *self, PyObject *bb) static PyObject * array_inplace_repeat(arrayobject *self, Py_ssize_t n) { - char *items, *p; - Py_ssize_t size, i; + const Py_ssize_t array_size = Py_SIZE(self); - if (Py_SIZE(self) > 0) { + if (array_size > 0 && n != 1 ) { if (n < 0) n = 0; if ((self->ob_descr->itemsize != 0) && - (Py_SIZE(self) > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) { + (array_size > PY_SSIZE_T_MAX / self->ob_descr->itemsize)) { return PyErr_NoMemory(); } - size = Py_SIZE(self) * self->ob_descr->itemsize; + Py_ssize_t size = array_size * self->ob_descr->itemsize; if (n > 0 && size > PY_SSIZE_T_MAX / n) { return PyErr_NoMemory(); } - if (array_resize(self, n * Py_SIZE(self)) == -1) + if (array_resize(self, n * array_size) == -1) return NULL; - items = p = self->ob_item; - for (i = 1; i < n; i++) { - p += size; - memcpy(p, items, size); - } + + _PyBytes_Repeat(self->ob_item, n*size, self->ob_item, size); } Py_INCREF(self); return (PyObject *)self; diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 74267cf..3849337 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" +#include "pycore_bytesobject.h" #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_strhex.h" // _Py_strhex_with_sep() #include "pycore_long.h" // _PyLong_FromUnsignedChar() @@ -319,37 +320,16 @@ bytearray_iconcat(PyByteArrayObject *self, PyObject *other) static PyObject * bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count) { - PyByteArrayObject *result; - Py_ssize_t mysize; - Py_ssize_t size; - const char *buf; - if (count < 0) count = 0; - mysize = Py_SIZE(self); + const Py_ssize_t mysize = Py_SIZE(self); if (count > 0 && mysize > PY_SSIZE_T_MAX / count) return PyErr_NoMemory(); - size = mysize * count; - result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size); - buf = PyByteArray_AS_STRING(self); + Py_ssize_t size = mysize * count; + PyByteArrayObject* result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size); + const char* buf = PyByteArray_AS_STRING(self); if (result != NULL && size != 0) { - if (mysize == 1) - memset(result->ob_bytes, buf[0], size); - else { - Py_ssize_t i, j; - - i = 0; - if (i < size) { - memcpy(result->ob_bytes, buf, mysize); - i = mysize; - } - // repeatedly double the number of bytes copied - while (i < size) { - j = Py_MIN(i, size - i); - memcpy(result->ob_bytes + i, result->ob_bytes, j); - i += j; - } - } + _PyBytes_Repeat(result->ob_bytes, size, buf, mysize); } return (PyObject *)result; } @@ -357,33 +337,22 @@ bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count) static PyObject * bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count) { - Py_ssize_t mysize; - Py_ssize_t size; - char *buf; - if (count < 0) count = 0; - mysize = Py_SIZE(self); + else if (count == 1) { + Py_INCREF(self); + return (PyObject*)self; + } + + const Py_ssize_t mysize = Py_SIZE(self); if (count > 0 && mysize > PY_SSIZE_T_MAX / count) return PyErr_NoMemory(); - size = mysize * count; + const Py_ssize_t size = mysize * count; if (PyByteArray_Resize((PyObject *)self, size) < 0) return NULL; - buf = PyByteArray_AS_STRING(self); - if (mysize == 1) - memset(buf, buf[0], size); - else { - Py_ssize_t i, j; - - i = mysize; - // repeatedly double the number of bytes copied - while (i < size) { - j = Py_MIN(i, size - i); - memcpy(buf + i, buf, j); - i += j; - } - } + char* buf = PyByteArray_AS_STRING(self); + _PyBytes_Repeat(buf, size, buf, mysize); Py_INCREF(self); return (PyObject *)self; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 56c941a..78c42c2 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -4,7 +4,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytesobject.h" // _PyBytes_Find() +#include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat() #include "pycore_bytes_methods.h" // _Py_bytes_startswith() #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_format.h" // F_LJUST @@ -1421,8 +1421,6 @@ bytes_concat(PyObject *a, PyObject *b) static PyObject * bytes_repeat(PyBytesObject *a, Py_ssize_t n) { - Py_ssize_t i; - Py_ssize_t j; Py_ssize_t size; PyBytesObject *op; size_t nbytes; @@ -1457,20 +1455,9 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS op->ob_shash = -1; _Py_COMP_DIAG_POP op->ob_sval[size] = '\0'; - if (Py_SIZE(a) == 1 && n > 0) { - memset(op->ob_sval, a->ob_sval[0] , n); - return (PyObject *) op; - } - i = 0; - if (i < size) { - memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a)); - i = Py_SIZE(a); - } - while (i < size) { - j = (i <= size-i) ? i : size-i; - memcpy(op->ob_sval+i, op->ob_sval, j); - i += j; - } + + _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a)); + return (PyObject *) op; } @@ -3528,3 +3515,28 @@ _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr, return str; } + + +void +_PyBytes_Repeat(char* dest, Py_ssize_t len_dest, + const char* src, Py_ssize_t len_src) +{ + if (len_dest == 0) { + return; + } + if (len_src == 1) { + memset(dest, src[0], len_dest); + } + else { + if (src != dest) { + memcpy(dest, src, len_src); + } + Py_ssize_t copied = len_src; + while (copied < len_dest) { + Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied); + memcpy(dest + copied, dest, bytes_to_copy); + copied += bytes_to_copy; + } + } +} + diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ce3ebce..72f9245 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -42,6 +42,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_atomic_funcs.h" // _Py_atomic_size_get() +#include "pycore_bytesobject.h" // _PyBytes_Repeat() #include "pycore_bytes_methods.h" // _Py_bytes_lower() #include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() @@ -12782,17 +12783,10 @@ unicode_repeat(PyObject *str, Py_ssize_t len) } } else { - /* number of characters copied this far */ - Py_ssize_t done = PyUnicode_GET_LENGTH(str); Py_ssize_t char_size = PyUnicode_KIND(str); char *to = (char *) PyUnicode_DATA(u); - memcpy(to, PyUnicode_DATA(str), - PyUnicode_GET_LENGTH(str) * char_size); - while (done < nchars) { - n = (done <= nchars-done) ? done : nchars-done; - memcpy(to + (done * char_size), to, n * char_size); - done += n; - } + _PyBytes_Repeat(to, nchars * char_size, PyUnicode_DATA(str), + PyUnicode_GET_LENGTH(str) * char_size); } assert(_PyUnicode_CheckConsistency(u, 1)); |