diff options
author | Tim Peters <tim.peters@gmail.com> | 2021-10-25 03:27:24 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-25 03:27:24 (GMT) |
commit | 51ed2c56a1852cd6b09c85ba81312dc9782772ce (patch) | |
tree | 8d6cbd4b4a443e5be911579b6ff9f2d49ee65711 /Objects/listobject.c | |
parent | 07236d562e59c6650227be18fa6ffc66b18d4741 (diff) | |
download | cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.zip cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.gz cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.bz2 |
bpo-45530: speed listobject.c's unsafe_tuple_compare() (GH-29076)
Keep track of whether unsafe_tuple_compare() calls are resolved by the very
first tuple elements, and adjust strategy accordingly. This can significantly
cut the number of calls made to the full-blown PyObject_RichCompareBool(),
and especially when duplicates are rare.
Co-authored-by: Ćukasz Langa <lukasz@langa.pl>
Diffstat (limited to 'Objects/listobject.c')
-rw-r--r-- | Objects/listobject.c | 77 |
1 files changed, 64 insertions, 13 deletions
diff --git a/Objects/listobject.c b/Objects/listobject.c index e0cae87..05743cd 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1220,6 +1220,13 @@ struct s_MergeState { * of tuples. It may be set to safe_object_compare, but the idea is that hopefully * we can assume more, and use one of the special-case compares. */ int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); + + /* Used by unsafe_tuple_compare to record whether the very first tuple + * elements resolved the last comparison attempt. If so, next time a + * method that may avoid PyObject_RichCompareBool() entirely is tried. + * 0 for false, 1 for true. + */ + int first_tuple_items_resolved_it; }; /* binarysort is the best method for sorting small arrays: it does @@ -2190,7 +2197,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms) * using the same pre-sort check as we use for ms->key_compare, * but run on the list [x[0] for x in L]. This allows us to optimize compares * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is - * that most tuple compares don't involve x[1:]. */ + * that most tuple compares don't involve x[1:]. + * However, that may not be right. When it is right, we can win by calling the + * relatively cheap ms->tuple_elem_compare on the first pair of elements, to + * see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done. + * Else we proceed as in the tuple compare, comparing the remaining pairs via + * the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if + * ever) that says "no, not equal!". Then, if we're still on the first pair, + * ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(..., + * Py_LT) finishes the job. + * In any case, ms->first_tuple_items_resolved_it keeps track of whether the + * most recent tuple comparison was resolved by the first pair. If so, the + * next attempt starts by trying the cheap tests on the first pair again, else + * PyObject_RichCompareBool(..., Py_EQ) is used from the start. + * There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper! + * For example, that can return "almost immediately" if passed the same + * object twice (it special-cases object identity for Py_EQ), which can, + * potentially, be unboundedly faster than ms->tuple_elem_compare. + */ static int unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) { @@ -2206,26 +2230,52 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; + i = 0; + if (ms->first_tuple_items_resolved_it) { + /* See whether fast compares of the first elements settle it. */ + k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms); + if (k) /* error, or v < w */ + return k; + k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms); + if (k > 0) /* w < v */ + return 0; + if (k < 0) /* error */ + return -1; + /* We have + * not (v[0] < w[0]) and not (w[0] < v[0]) + * which implies, for a total order, that the first elements are + * equal. So skip them in the loop. + */ + i = 1; + ms->first_tuple_items_resolved_it = 0; + } + /* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0 + * at the end of the `if` block just above. + */ + assert(! ms->first_tuple_items_resolved_it); vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); - - for (i = 0; i < vlen && i < wlen; i++) { + for (; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); + if (!k) { /* not equal */ + if (i) { + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], + Py_LT); + } + else { + ms->first_tuple_items_resolved_it = 1; + return ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], + ms); + } + } if (k < 0) return -1; - if (!k) - break; } + /* all equal until we fell off the end */ + return vlen < wlen; - if (i >= vlen || i >= wlen) - return vlen < wlen; - - if (i == 0) - return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms); - else - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); -} + } /* An adaptive, stable, natural mergesort. See listsort.txt. * Returns Py_None on success, NULL on error. Even in case of error, the @@ -2408,6 +2458,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) } ms.key_compare = unsafe_tuple_compare; + ms.first_tuple_items_resolved_it = 1; /* be optimistic */ } } /* End of pre-sort check: ms is now set properly! */ |