bpo-45530: speed listobject.c's unsafe_tuple_compare() (GH-29076)

Keep track of whether unsafe_tuple_compare() calls are resolved by the very first tuple elements, and adjust strategy accordingly. This can significantly cut the number of calls made to the full-blown PyObject_RichCompareBool(), and especially when duplicates are rare. Co-authored-by: Łukasz Langa <lukasz@langa.pl>
author: Tim Peters <tim.peters@gmail.com> 2021-10-25 03:27:24 (GMT)
committer: GitHub <noreply@github.com> 2021-10-25 03:27:24 (GMT)
commit: 51ed2c56a1852cd6b09c85ba81312dc9782772ce (patch)
tree: 8d6cbd4b4a443e5be911579b6ff9f2d49ee65711 /Objects/listobject.c
parent: 07236d562e59c6650227be18fa6ffc66b18d4741 (diff)
download: cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.zip
cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.gz
cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.bz2
1 files changed, 64 insertions, 13 deletions
diff --git a/Objects/listobject.c b/Objects/listobject.c
index e0cae87..05743cd 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1220,6 +1220,13 @@ struct s_MergeState {
      * of tuples. It may be set to safe_object_compare, but the idea is that hopefully
      * we can assume more, and use one of the special-case compares. */
     int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *);
+
+    /* Used by unsafe_tuple_compare to record whether the very first tuple
+     * elements resolved the last comparison attempt. If so, next time a
+     * method that may avoid PyObject_RichCompareBool() entirely is tried.
+     * 0 for false, 1 for true.
+     */
+    int first_tuple_items_resolved_it;
 };
 
 /* binarysort is the best method for sorting small arrays: it does
@@ -2190,7 +2197,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
  * using the same pre-sort check as we use for ms->key_compare,
  * but run on the list [x[0] for x in L]. This allows us to optimize compares
  * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is
- * that most tuple compares don't involve x[1:]. */
+ * that most tuple compares don't involve x[1:].
+ * However, that may not be right. When it is right, we can win by calling the
+ * relatively cheap ms->tuple_elem_compare on the first pair of elements, to
+ * see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done.
+ * Else we proceed as in the tuple compare, comparing the remaining pairs via
+ * the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if
+ * ever) that says "no, not equal!". Then, if we're still on the first pair,
+ * ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(...,
+ * Py_LT) finishes the job.
+ * In any case, ms->first_tuple_items_resolved_it keeps track of whether the
+ * most recent tuple comparison was resolved by the first pair. If so, the
+ * next attempt starts by trying the cheap tests on the first pair again, else
+ * PyObject_RichCompareBool(..., Py_EQ) is used from the start.
+ * There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper!
+ * For example, that can return "almost immediately" if passed the same
+ * object twice (it special-cases object identity for Py_EQ), which can,
+ * potentially, be unboundedly faster than ms->tuple_elem_compare.
+ */
 static int
 unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
 {
@@ -2206,26 +2230,52 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
 
     vt = (PyTupleObject *)v;
     wt = (PyTupleObject *)w;
+    i = 0;
+    if (ms->first_tuple_items_resolved_it) {
+        /* See whether fast compares of the first elements settle it. */
+        k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms);
+        if (k) /* error, or v < w */
+            return k;
+        k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms);
+        if (k > 0) /* w < v */
+            return 0;
+        if (k < 0) /* error */
+            return -1;
+        /* We have
+         *     not (v[0] < w[0]) and not (w[0] < v[0])
+         * which implies, for a total order, that the first elements are
+         * equal. So skip them in the loop.
+         */
+        i = 1;
+        ms->first_tuple_items_resolved_it = 0;
+    }
+    /* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0
+     * at the end of the `if` block just above.
+     */
+    assert(! ms->first_tuple_items_resolved_it);
 
     vlen = Py_SIZE(vt);
     wlen = Py_SIZE(wt);
-
-    for (i = 0; i < vlen && i < wlen; i++) {
+    for (; i < vlen && i < wlen; i++) {
         k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ);
+        if (!k) { /* not equal */
+            if (i) {
+                return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i],
+                                                Py_LT);
+            }
+            else {
+                ms->first_tuple_items_resolved_it = 1;
+                return ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0],
+                                              ms);
+            }
+        }
         if (k < 0)
             return -1;
-        if (!k)
-            break;
     }
+    /* all equal until we fell off the end */
+    return vlen < wlen;
 
-    if (i >= vlen || i >= wlen)
-        return vlen < wlen;
-
-    if (i == 0)
-        return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
-    else
-        return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
-}
+ }
 
 /* An adaptive, stable, natural mergesort.  See listsort.txt.
  * Returns Py_None on success, NULL on error.  Even in case of error, the
@@ -2408,6 +2458,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
             }
 
             ms.key_compare = unsafe_tuple_compare;
+            ms.first_tuple_items_resolved_it = 1; /* be optimistic */
         }
     }
     /* End of pre-sort check: ms is now set properly! */
author	Tim Peters <tim.peters@gmail.com>	2021-10-25 03:27:24 (GMT)
committer	GitHub <noreply@github.com>	2021-10-25 03:27:24 (GMT)
commit	51ed2c56a1852cd6b09c85ba81312dc9782772ce (patch)
tree	8d6cbd4b4a443e5be911579b6ff9f2d49ee65711 /Objects/listobject.c
parent	07236d562e59c6650227be18fa6ffc66b18d4741 (diff)
download	cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.zip cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.gz cpython-51ed2c56a1852cd6b09c85ba81312dc9782772ce.tar.bz2