summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst1
-rw-r--r--Objects/longobject.c115
3 files changed, 108 insertions, 9 deletions
diff --git a/Misc/ACKS b/Misc/ACKS
index cf023c9..6df339c 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1860,6 +1860,7 @@ Kurt Vile
Norman Vine
Pauli Virtanen
Frank Visser
+Jeremiah Vivian (Pascual)
Johannes Vogel
Michael Vogt
Radu Voicilas
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst
new file mode 100644
index 0000000..e7f555f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst
@@ -0,0 +1 @@
+Optimize some modulo operations in ``Objects/longobject.c``. Patch by Jeremiah Vivian.
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 5f0cc57..cc4ceec 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1670,6 +1670,35 @@ divrem1(PyLongObject *a, digit n, digit *prem)
return long_normalize(z);
}
+/* Remainder of long pin, w/ size digits, by non-zero digit n,
+ returning the remainder. pin points at the LSD. */
+
+static digit
+inplace_rem1(digit *pin, Py_ssize_t size, digit n)
+{
+ twodigits rem = 0;
+
+ assert(n > 0 && n <= PyLong_MASK);
+ while (--size >= 0)
+ rem = ((rem << PyLong_SHIFT) | pin[size]) % n;
+ return (digit)rem;
+}
+
+/* Get the remainder of an integer divided by a digit, returning
+ the remainder as the result of the function. The sign of a is
+ ignored; n should not be zero. */
+
+static PyLongObject *
+rem1(PyLongObject *a, digit n)
+{
+ const Py_ssize_t size = Py_ABS(Py_SIZE(a));
+
+ assert(n > 0 && n <= PyLong_MASK);
+ return (PyLongObject *)PyLong_FromLong(
+ (long)inplace_rem1(a->ob_digit, size, n)
+ );
+}
+
/* Convert an integer to a base 10 string. Returns a new non-shared
string. (Return value is non-shared so that callers can modify the
returned value if necessary.) */
@@ -2689,6 +2718,47 @@ long_divrem(PyLongObject *a, PyLongObject *b,
return 0;
}
+/* Int remainder, top-level routine */
+
+static int
+long_rem(PyLongObject *a, PyLongObject *b, PyLongObject **prem)
+{
+ Py_ssize_t size_a = Py_ABS(Py_SIZE(a)), size_b = Py_ABS(Py_SIZE(b));
+
+ if (size_b == 0) {
+ PyErr_SetString(PyExc_ZeroDivisionError,
+ "integer modulo by zero");
+ return -1;
+ }
+ if (size_a < size_b ||
+ (size_a == size_b &&
+ a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) {
+ /* |a| < |b|. */
+ *prem = (PyLongObject *)long_long((PyObject *)a);
+ return -(*prem == NULL);
+ }
+ if (size_b == 1) {
+ *prem = rem1(a, b->ob_digit[0]);
+ if (*prem == NULL)
+ return -1;
+ }
+ else {
+ /* Slow path using divrem. */
+ x_divrem(a, b, prem);
+ if (*prem == NULL)
+ return -1;
+ }
+ /* Set the sign. */
+ if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) {
+ _PyLong_Negate(prem);
+ if (*prem == NULL) {
+ Py_CLEAR(*prem);
+ return -1;
+ }
+ }
+ return 0;
+}
+
/* Unsigned int division with remainder -- the algorithm. The arguments v1
and w1 should satisfy 2 <= Py_ABS(Py_SIZE(w1)) <= Py_ABS(Py_SIZE(v1)). */
@@ -3814,6 +3884,37 @@ l_divmod(PyLongObject *v, PyLongObject *w,
return 0;
}
+/* Compute
+ * *pmod = v % w
+ * pmod cannot be NULL. The caller owns a reference to pmod.
+ */
+static int
+l_mod(PyLongObject *v, PyLongObject *w, PyLongObject **pmod)
+{
+ PyLongObject *mod;
+
+ assert(pmod);
+ if (Py_ABS(Py_SIZE(v)) == 1 && Py_ABS(Py_SIZE(w)) == 1) {
+ /* Fast path for single-digit longs */
+ *pmod = (PyLongObject *)fast_mod(v, w);
+ return -(*pmod == NULL);
+ }
+ if (long_rem(v, w, &mod) < 0)
+ return -1;
+ if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) ||
+ (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) {
+ PyLongObject *temp;
+ temp = (PyLongObject *) long_add(mod, w);
+ Py_DECREF(mod);
+ mod = temp;
+ if (mod == NULL)
+ return -1;
+ }
+ *pmod = mod;
+
+ return 0;
+}
+
static PyObject *
long_div(PyObject *a, PyObject *b)
{
@@ -4100,11 +4201,7 @@ long_mod(PyObject *a, PyObject *b)
CHECK_BINOP(a, b);
- if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) {
- return fast_mod((PyLongObject*)a, (PyLongObject*)b);
- }
-
- if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0)
+ if (l_mod((PyLongObject*)a, (PyLongObject*)b, &mod) < 0)
mod = NULL;
return (PyObject *)mod;
}
@@ -4333,10 +4430,10 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
while the "large exponent" case multiplies directly by base 31
times. It can be unboundedly faster to multiply by
base % modulus instead.
- We could _always_ do this reduction, but l_divmod() isn't cheap,
+ We could _always_ do this reduction, but l_mod() isn't cheap,
so we only do it when it buys something. */
if (Py_SIZE(a) < 0 || Py_SIZE(a) > Py_SIZE(c)) {
- if (l_divmod(a, c, NULL, &temp) < 0)
+ if (l_mod(a, c, &temp) < 0)
goto Error;
Py_DECREF(a);
a = temp;
@@ -4357,7 +4454,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
#define REDUCE(X) \
do { \
if (c != NULL) { \
- if (l_divmod(X, c, NULL, &temp) < 0) \
+ if (l_mod(X, c, &temp) < 0) \
goto Error; \
Py_XDECREF(X); \
X = temp; \
@@ -5022,7 +5119,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg)
if (k == 0) {
/* no progress; do a Euclidean step */
- if (l_divmod(a, b, NULL, &r) < 0)
+ if (l_mod(a, b, &r) < 0)
goto error;
Py_DECREF(a);
a = b;