From 20b0f87e1d031390f5b65e28b0026c42f53abe5d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 23 Jun 2013 15:44:33 -0700 Subject: Misc improvements to collections.deque() * Clarified comment on the impact of BLOCKLEN on deque_index (with a power-of-two, the division and modulo computations are done with a right-shift and bitwise-and). * Clarified comment on the overflow check to note that it is general and not just applicable the 64-bit builds. * In deque._rotate(), the "deque->" indirections are factored-out of the loop (loop invariant code motion), leaving the code cleaner looking and slightly faster. * In deque._rotate(), replaced the memcpy() with an equivalent loop. That saved the memcpy setup time and allowed the pointers to move in their natural leftward and rightward directions. See comparative timings at: http://pastebin.com/p0RJnT5N --- Modules/_collectionsmodule.c | 165 ++++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 72 deletions(-) diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index a23f714..b922475 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -8,9 +8,10 @@ */ /* The block length may be set to any number over 1. Larger numbers - * reduce the number of calls to the memory allocator but take more - * memory. Ideally, (BLOCKLEN+2) should be set to a multiple of the - * length of a cache line. + * reduce the number of calls to the memory allocator, give faster + * indexing and rotation, and reduce the link::data overhead ratio. + * If the block length is a power-of-two, we also get faster + * division/modulo computations during indexing. */ #define BLOCKLEN 62 @@ -47,8 +48,8 @@ typedef struct BLOCK { struct BLOCK *leftlink; - struct BLOCK *rightlink; PyObject *data[BLOCKLEN]; + struct BLOCK *rightlink; } block; #define MAXFREEBLOCKS 10 @@ -58,13 +59,8 @@ static block *freeblocks[MAXFREEBLOCKS]; static block * newblock(block *leftlink, block *rightlink, Py_ssize_t len) { block *b; - /* To prevent len from overflowing PY_SSIZE_T_MAX on 64-bit machines, we - * refuse to allocate new blocks if the current len is dangerously - * close. There is some extra margin to prevent spurious arithmetic - * overflows at various places. The following check ensures that - * the blocks allocated to the deque, in the worst case, can only - * have PY_SSIZE_T_MAX-2 entries in total. - */ + /* To prevent len from overflowing PY_SSIZE_T_MAX, we refuse to + * allocate new blocks if the current len is nearing overflow. */ if (len >= PY_SSIZE_T_MAX - 2*BLOCKLEN) { PyErr_SetString(PyExc_OverflowError, "cannot add more blocks to the deque"); @@ -413,7 +409,12 @@ deque_inplace_concat(dequeobject *deque, PyObject *other) static int _deque_rotate(dequeobject *deque, Py_ssize_t n) { - Py_ssize_t m, len=deque->len, halflen=len>>1; + block *leftblock = deque->leftblock; + block *rightblock = deque->rightblock; + Py_ssize_t leftindex = deque->leftindex; + Py_ssize_t rightindex = deque->rightindex; + Py_ssize_t len=deque->len, halflen=len>>1; + int rv = 0; if (len <= 1) return 0; @@ -429,76 +430,96 @@ _deque_rotate(dequeobject *deque, Py_ssize_t n) deque->state++; while (n > 0) { - if (deque->leftindex == 0) { - block *b = newblock(NULL, deque->leftblock, len); - if (b == NULL) - return -1; - assert(deque->leftblock->leftlink == NULL); - deque->leftblock->leftlink = b; - deque->leftblock = b; - deque->leftindex = BLOCKLEN; + if (leftindex == 0) { + block *b = newblock(NULL, leftblock, len); + if (b == NULL) { + rv = -1; + goto done; + } + assert(leftblock->leftlink == NULL); + leftblock->leftlink = b; + leftblock = b; + leftindex = BLOCKLEN; } - assert(deque->leftindex > 0); - - m = n; - if (m > deque->rightindex + 1) - m = deque->rightindex + 1; - if (m > deque->leftindex) - m = deque->leftindex; - assert (m > 0 && m <= len); - memcpy(&deque->leftblock->data[deque->leftindex - m], - &deque->rightblock->data[deque->rightindex + 1 - m], - m * sizeof(PyObject *)); - deque->rightindex -= m; - deque->leftindex -= m; - n -= m; - - if (deque->rightindex == -1) { - block *prevblock = deque->rightblock->leftlink; - assert(deque->rightblock != NULL); - assert(deque->leftblock != deque->rightblock); - freeblock(deque->rightblock); + assert(leftindex > 0); + + { + PyObject **src, **dest; + Py_ssize_t m = n; + + if (m > rightindex + 1) + m = rightindex + 1; + if (m > leftindex) + m = leftindex; + assert (m > 0 && m <= len); + src = &rightblock->data[rightindex]; + dest = &leftblock->data[leftindex - 1]; + rightindex -= m; + leftindex -= m; + n -= m; + while (m--) + *(dest--) = *(src--); + } + + if (rightindex == -1) { + block *prevblock = rightblock->leftlink; + assert(rightblock != NULL); + assert(leftblock != rightblock); + freeblock(rightblock); prevblock->rightlink = NULL; - deque->rightblock = prevblock; - deque->rightindex = BLOCKLEN - 1; + rightblock = prevblock; + rightindex = BLOCKLEN - 1; } } while (n < 0) { - if (deque->rightindex == BLOCKLEN - 1) { - block *b = newblock(deque->rightblock, NULL, len); - if (b == NULL) - return -1; - assert(deque->rightblock->rightlink == NULL); - deque->rightblock->rightlink = b; - deque->rightblock = b; - deque->rightindex = -1; + if (rightindex == BLOCKLEN - 1) { + block *b = newblock(rightblock, NULL, len); + if (b == NULL) { + rv = -1; + goto done; + } + assert(rightblock->rightlink == NULL); + rightblock->rightlink = b; + rightblock = b; + rightindex = -1; } - assert (deque->rightindex < BLOCKLEN - 1); - - m = -n; - if (m > BLOCKLEN - deque->leftindex) - m = BLOCKLEN - deque->leftindex; - if (m > BLOCKLEN - 1 - deque->rightindex) - m = BLOCKLEN - 1 - deque->rightindex; - assert (m > 0 && m <= len); - memcpy(&deque->rightblock->data[deque->rightindex + 1], - &deque->leftblock->data[deque->leftindex], - m * sizeof(PyObject *)); - deque->leftindex += m; - deque->rightindex += m; - n += m; - - if (deque->leftindex == BLOCKLEN) { - block *nextblock = deque->leftblock->rightlink; - assert(deque->leftblock != deque->rightblock); - freeblock(deque->leftblock); + assert (rightindex < BLOCKLEN - 1); + + { + PyObject **src, **dest; + Py_ssize_t m = -n; + + if (m > BLOCKLEN - leftindex) + m = BLOCKLEN - leftindex; + if (m > BLOCKLEN - 1 - rightindex) + m = BLOCKLEN - 1 - rightindex; + assert (m > 0 && m <= len); + src = &leftblock->data[leftindex]; + dest = &rightblock->data[rightindex + 1]; + leftindex += m; + rightindex += m; + n += m; + while (m--) + *(dest++) = *(src++); + } + + if (leftindex == BLOCKLEN) { + block *nextblock = leftblock->rightlink; + assert(leftblock != rightblock); + freeblock(leftblock); assert(nextblock != NULL); nextblock->leftlink = NULL; - deque->leftblock = nextblock; - deque->leftindex = 0; + leftblock = nextblock; + leftindex = 0; } } - return 0; +done: + deque->leftblock = leftblock; + deque->rightblock = rightblock; + deque->leftindex = leftindex; + deque->rightindex = rightindex; + + return rv; } static PyObject * -- cgit v0.12