summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/math.rst5
-rw-r--r--Lib/test/test_math.py51
-rw-r--r--Modules/mathmodule.c14
3 files changed, 63 insertions, 7 deletions
diff --git a/Doc/library/math.rst b/Doc/library/math.rst
index 6ec1fee..bbf6464 100644
--- a/Doc/library/math.rst
+++ b/Doc/library/math.rst
@@ -481,6 +481,11 @@ Trigonometric functions
Added support for n-dimensional points. Formerly, only the two
dimensional case was supported.
+ .. versionchanged:: 3.10
+ Improved the algorithm's accuracy so that the maximum error is
+ under 1 ulp (unit in the last place). More typically, the result
+ is almost always correctly rounded to within 1/2 ulp.
+
.. function:: sin(x)
diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py
index 4d62eb1..bbaa533 100644
--- a/Lib/test/test_math.py
+++ b/Lib/test/test_math.py
@@ -803,6 +803,57 @@ class MathTests(unittest.TestCase):
scale = FLOAT_MIN / 2.0 ** exp
self.assertEqual(math.hypot(4*scale, 3*scale), 5*scale)
+ def testHypotAccuracy(self):
+ # Verify improved accuracy in cases that were known to be inaccurate.
+
+ hypot = math.hypot
+ Decimal = decimal.Decimal
+ high_precision = decimal.Context(prec=500)
+
+ for hx, hy in [
+ # Cases with a 1 ulp error in Python 3.7 compiled with Clang
+ ('0x1.10e89518dca48p+29', '0x1.1970f7565b7efp+30'),
+ ('0x1.10106eb4b44a2p+29', '0x1.ef0596cdc97f8p+29'),
+ ('0x1.459c058e20bb7p+30', '0x1.993ca009b9178p+29'),
+ ('0x1.378371ae67c0cp+30', '0x1.fbe6619854b4cp+29'),
+ ('0x1.f4cd0574fb97ap+29', '0x1.50fe31669340ep+30'),
+ ('0x1.494b2cdd3d446p+29', '0x1.212a5367b4c7cp+29'),
+ ('0x1.f84e649f1e46dp+29', '0x1.1fa56bef8eec4p+30'),
+ ('0x1.2e817edd3d6fap+30', '0x1.eb0814f1e9602p+29'),
+ ('0x1.0d3a6e3d04245p+29', '0x1.32a62fea52352p+30'),
+ ('0x1.888e19611bfc5p+29', '0x1.52b8e70b24353p+29'),
+
+ # Cases with 2 ulp error in Python 3.8
+ ('0x1.538816d48a13fp+29', '0x1.7967c5ca43e16p+29'),
+ ('0x1.57b47b7234530p+29', '0x1.74e2c7040e772p+29'),
+ ('0x1.821b685e9b168p+30', '0x1.677dc1c1e3dc6p+29'),
+ ('0x1.9e8247f67097bp+29', '0x1.24bd2dc4f4baep+29'),
+ ('0x1.b73b59e0cb5f9p+29', '0x1.da899ab784a97p+28'),
+ ('0x1.94a8d2842a7cfp+30', '0x1.326a51d4d8d8ap+30'),
+ ('0x1.e930b9cd99035p+29', '0x1.5a1030e18dff9p+30'),
+ ('0x1.1592bbb0e4690p+29', '0x1.a9c337b33fb9ap+29'),
+ ('0x1.1243a50751fd4p+29', '0x1.a5a10175622d9p+29'),
+ ('0x1.57a8596e74722p+30', '0x1.42d1af9d04da9p+30'),
+
+ # Cases with 1 ulp error in version fff3c28052e6b0750d6218e00acacd2fded4991a
+ ('0x1.ee7dbd9565899p+29', '0x1.7ab4d6fc6e4b4p+29'),
+ ('0x1.5c6bfbec5c4dcp+30', '0x1.02511184b4970p+30'),
+ ('0x1.59dcebba995cap+30', '0x1.50ca7e7c38854p+29'),
+ ('0x1.768cdd94cf5aap+29', '0x1.9cfdc5571d38ep+29'),
+ ('0x1.dcf137d60262ep+29', '0x1.1101621990b3ep+30'),
+ ('0x1.3a2d006e288b0p+30', '0x1.e9a240914326cp+29'),
+ ('0x1.62a32f7f53c61p+29', '0x1.47eb6cd72684fp+29'),
+ ('0x1.d3bcb60748ef2p+29', '0x1.3f13c4056312cp+30'),
+ ('0x1.282bdb82f17f3p+30', '0x1.640ba4c4eed3ap+30'),
+ ('0x1.89d8c423ea0c6p+29', '0x1.d35dcfe902bc3p+29'),
+ ]:
+ with self.subTest(hx=hx, hy=hy):
+ x = float.fromhex(hx)
+ y = float.fromhex(hy)
+ with decimal.localcontext(high_precision):
+ z = float((Decimal(x)**2 + Decimal(y)**2).sqrt())
+ self.assertEqual(hypot(x, y), z)
+
def testDist(self):
from decimal import Decimal as D
from fractions import Fraction as F
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
index 29137ae..ecd291e 100644
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2429,7 +2429,7 @@ magnitude. We avoid this cost by arranging the calculation so that
fabs(csum) is always as large as fabs(x).
To establish the invariant, *csum* is initialized to 1.0 which is
-always larger than x**2 after scaling or division by *max*.
+always larger than x**2 after scaling or after division by *max*.
After the loop is finished, the initial 1.0 is subtracted out for a
net zero effect on the final sum. Since *csum* will be greater than
1.0, the subtraction of 1.0 will not cause fractional digits to be
@@ -2458,7 +2458,7 @@ Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
To minimize loss of information during the accumulation of fractional
values, each term has a separate accumulator. This also breaks up
sequential dependencies in the inner loop so the CPU can maximize
-floating point throughput. [5] On a 2.6 GHz Haswell, adding one
+floating point throughput. [4] On a 2.6 GHz Haswell, adding one
dimension has an incremental cost of only 5ns -- for example when
moving from hypot(x,y) to hypot(x,y,z).
@@ -2470,7 +2470,7 @@ The differential correction starts with a value *x* that is
the difference between the square of *h*, the possibly inaccurately
rounded square root, and the accurately computed sum of squares.
The correction is the first order term of the Maclaurin series
-expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [4]
+expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
Essentially, this differential correction is equivalent to one
refinement step in Newton's divide-and-average square root
@@ -2492,10 +2492,10 @@ References:
1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
3. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf
-4. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
-5. https://bugs.python.org/file49439/hypot.png
-6. https://bugs.python.org/file49435/best_frac.py
-7. https://bugs.python.org/file49448/test_hypot_commutativity.py
+4. Data dependency graph: https://bugs.python.org/file49439/hypot.png
+5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
+6. Analysis of internal accuracy: https://bugs.python.org/file49435/best_frac.py
+7. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py
*/