summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2022-01-25 01:06:00 (GMT)
committerGitHub <noreply@github.com>2022-01-25 01:06:00 (GMT)
commit7c26472d09548905d8c158b26b6a2b12de6cdc32 (patch)
tree282ffe1c879e53091e01aa2bd4bce7c7d1f369dd /Objects
parentb18fd54f8c27e4b2aac222e75ac58aa85e5a7988 (diff)
downloadcpython-7c26472d09548905d8c158b26b6a2b12de6cdc32.zip
cpython-7c26472d09548905d8c158b26b6a2b12de6cdc32.tar.gz
cpython-7c26472d09548905d8c158b26b6a2b12de6cdc32.tar.bz2
bpo-46504: faster code for trial quotient in x_divrem() (GH-30856)
* bpo-46504: faster code for trial quotient in x_divrem() This brings x_divrem() back into synch with x_divrem1(), which was changed in bpo-46406 to generate faster code to find machine-word division quotients and remainders. Modern processors compute both with a single machine instruction, but convincing C to exploit that requires writing _less_ "clever" C code.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/longobject.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c
index ee20e26..5f0cc57 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -2767,8 +2767,15 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
vtop = vk[size_w];
assert(vtop <= wm1);
vv = ((twodigits)vtop << PyLong_SHIFT) | vk[size_w-1];
+ /* The code used to compute the remainder via
+ * r = (digit)(vv - (twodigits)wm1 * q);
+ * and compilers generally generated code to do the * and -.
+ * But modern processors generally compute q and r with a single
+ * instruction, and modern optimizing compilers exploit that if we
+ * _don't_ try to optimize it.
+ */
q = (digit)(vv / wm1);
- r = (digit)(vv - (twodigits)wm1 * q); /* r = vv % wm1 */
+ r = (digit)(vv % wm1);
while ((twodigits)wm2 * q > (((twodigits)r << PyLong_SHIFT)
| vk[size_w-2])) {
--q;