summaryrefslogtreecommitdiffstats
path: root/Objects/listobject.c
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2021-10-25 03:27:24 (GMT)
committerGitHub <noreply@github.com>2021-10-25 03:27:24 (GMT)
commit51ed2c56a1852cd6b09c85ba81312dc9782772ce (patch)
tree8d6cbd4b4a443e5be911579b6ff9f2d49ee65711 /Objects/listobject.c
parent07236d562e59c6650227be18fa6ffc66b18d4741 (diff)
downloadcpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.zip
cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.gz
cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.bz2
bpo-45530: speed listobject.c's unsafe_tuple_compare() (GH-29076)
Keep track of whether unsafe_tuple_compare() calls are resolved by the very first tuple elements, and adjust strategy accordingly. This can significantly cut the number of calls made to the full-blown PyObject_RichCompareBool(), and especially when duplicates are rare. Co-authored-by: Ɓukasz Langa <lukasz@langa.pl>
Diffstat (limited to 'Objects/listobject.c')
-rw-r--r--Objects/listobject.c77
1 files changed, 64 insertions, 13 deletions
diff --git a/Objects/listobject.c b/Objects/listobject.c
index e0cae87..05743cd 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1220,6 +1220,13 @@ struct s_MergeState {
* of tuples. It may be set to safe_object_compare, but the idea is that hopefully
* we can assume more, and use one of the special-case compares. */
int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *);
+
+ /* Used by unsafe_tuple_compare to record whether the very first tuple
+ * elements resolved the last comparison attempt. If so, next time a
+ * method that may avoid PyObject_RichCompareBool() entirely is tried.
+ * 0 for false, 1 for true.
+ */
+ int first_tuple_items_resolved_it;
};
/* binarysort is the best method for sorting small arrays: it does
@@ -2190,7 +2197,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
* using the same pre-sort check as we use for ms->key_compare,
* but run on the list [x[0] for x in L]. This allows us to optimize compares
* on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is
- * that most tuple compares don't involve x[1:]. */
+ * that most tuple compares don't involve x[1:].
+ * However, that may not be right. When it is right, we can win by calling the
+ * relatively cheap ms->tuple_elem_compare on the first pair of elements, to
+ * see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done.
+ * Else we proceed as in the tuple compare, comparing the remaining pairs via
+ * the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if
+ * ever) that says "no, not equal!". Then, if we're still on the first pair,
+ * ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(...,
+ * Py_LT) finishes the job.
+ * In any case, ms->first_tuple_items_resolved_it keeps track of whether the
+ * most recent tuple comparison was resolved by the first pair. If so, the
+ * next attempt starts by trying the cheap tests on the first pair again, else
+ * PyObject_RichCompareBool(..., Py_EQ) is used from the start.
+ * There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper!
+ * For example, that can return "almost immediately" if passed the same
+ * object twice (it special-cases object identity for Py_EQ), which can,
+ * potentially, be unboundedly faster than ms->tuple_elem_compare.
+ */
static int
unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
{
@@ -2206,26 +2230,52 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
vt = (PyTupleObject *)v;
wt = (PyTupleObject *)w;
+ i = 0;
+ if (ms->first_tuple_items_resolved_it) {
+ /* See whether fast compares of the first elements settle it. */
+ k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms);
+ if (k) /* error, or v < w */
+ return k;
+ k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms);
+ if (k > 0) /* w < v */
+ return 0;
+ if (k < 0) /* error */
+ return -1;
+ /* We have
+ * not (v[0] < w[0]) and not (w[0] < v[0])
+ * which implies, for a total order, that the first elements are
+ * equal. So skip them in the loop.
+ */
+ i = 1;
+ ms->first_tuple_items_resolved_it = 0;
+ }
+ /* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0
+ * at the end of the `if` block just above.
+ */
+ assert(! ms->first_tuple_items_resolved_it);
vlen = Py_SIZE(vt);
wlen = Py_SIZE(wt);
-
- for (i = 0; i < vlen && i < wlen; i++) {
+ for (; i < vlen && i < wlen; i++) {
k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ);
+ if (!k) { /* not equal */
+ if (i) {
+ return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i],
+ Py_LT);
+ }
+ else {
+ ms->first_tuple_items_resolved_it = 1;
+ return ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0],
+ ms);
+ }
+ }
if (k < 0)
return -1;
- if (!k)
- break;
}
+ /* all equal until we fell off the end */
+ return vlen < wlen;
- if (i >= vlen || i >= wlen)
- return vlen < wlen;
-
- if (i == 0)
- return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
- else
- return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
-}
+ }
/* An adaptive, stable, natural mergesort. See listsort.txt.
* Returns Py_None on success, NULL on error. Even in case of error, the
@@ -2408,6 +2458,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
}
ms.key_compare = unsafe_tuple_compare;
+ ms.first_tuple_items_resolved_it = 1; /* be optimistic */
}
}
/* End of pre-sort check: ms is now set properly! */