diff options
author | Tim Peters <tim.peters@gmail.com> | 2022-01-12 18:55:02 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-12 18:55:02 (GMT) |
commit | fc05e6bfce5d5dfc23859e6f7862c1e707a12e42 (patch) | |
tree | 752b978b00117c5819083a8cb259744401afbb3a /Objects/longobject.c | |
parent | e2a9c8ef09cb7123d6b28852a323e6cc1f878b5b (diff) | |
download | cpython-fc05e6bfce5d5dfc23859e6f7862c1e707a12e42.zip cpython-fc05e6bfce5d5dfc23859e6f7862c1e707a12e42.tar.gz cpython-fc05e6bfce5d5dfc23859e6f7862c1e707a12e42.tar.bz2 |
bpo-46020: Optimize long_pow for the common case (GH-30555)
This cuts a bit of overhead by not initializing the table of small
odd powers unless it's needed for a large exponent.
Diffstat (limited to 'Objects/longobject.c')
-rw-r--r-- | Objects/longobject.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c index 2db8701..5d181aa 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4215,8 +4215,13 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) /* k-ary values. If the exponent is large enough, table is * precomputed so that table[i] == a**(2*i+1) % c for i in * range(EXP_TABLE_LEN). + * Note: this is uninitialzed stack trash: don't pay to set it to known + * values unless it's needed. Instead ensure that num_table_entries is + * set to the number of entries actually filled whenever a branch to the + * Error or Done labels is possible. */ - PyLongObject *table[EXP_TABLE_LEN] = {0}; + PyLongObject *table[EXP_TABLE_LEN]; + Py_ssize_t num_table_entries = 0; /* a, b, c = v, w, x */ CHECK_BINOP(v, w); @@ -4408,10 +4413,14 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) */ Py_INCREF(a); table[0] = a; + num_table_entries = 1; MULT(a, a, a2); /* table[i] == a**(2*i + 1) % c */ - for (i = 1; i < EXP_TABLE_LEN; ++i) + for (i = 1; i < EXP_TABLE_LEN; ++i) { + table[i] = NULL; /* must set to known value for MULT */ MULT(table[i-1], a2, table[i]); + ++num_table_entries; /* incremented iff MULT succeeded */ + } Py_CLEAR(a2); /* Repeatedly extract the next (no more than) EXP_WINDOW_SIZE bits @@ -4472,10 +4481,8 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) Py_CLEAR(z); /* fall through */ Done: - if (Py_SIZE(b) > HUGE_EXP_CUTOFF / PyLong_SHIFT) { - for (i = 0; i < EXP_TABLE_LEN; ++i) - Py_XDECREF(table[i]); - } + for (i = 0; i < num_table_entries; ++i) + Py_DECREF(table[i]); Py_DECREF(a); Py_DECREF(b); Py_XDECREF(c); |