diff options
-rw-r--r-- | Doc/library/math.rst | 5 | ||||
-rw-r--r-- | Lib/test/test_math.py | 51 | ||||
-rw-r--r-- | Modules/mathmodule.c | 14 |
3 files changed, 63 insertions, 7 deletions
diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 6ec1fee..bbf6464 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -481,6 +481,11 @@ Trigonometric functions Added support for n-dimensional points. Formerly, only the two dimensional case was supported. + .. versionchanged:: 3.10 + Improved the algorithm's accuracy so that the maximum error is + under 1 ulp (unit in the last place). More typically, the result + is almost always correctly rounded to within 1/2 ulp. + .. function:: sin(x) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 4d62eb1..bbaa533 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -803,6 +803,57 @@ class MathTests(unittest.TestCase): scale = FLOAT_MIN / 2.0 ** exp self.assertEqual(math.hypot(4*scale, 3*scale), 5*scale) + def testHypotAccuracy(self): + # Verify improved accuracy in cases that were known to be inaccurate. + + hypot = math.hypot + Decimal = decimal.Decimal + high_precision = decimal.Context(prec=500) + + for hx, hy in [ + # Cases with a 1 ulp error in Python 3.7 compiled with Clang + ('0x1.10e89518dca48p+29', '0x1.1970f7565b7efp+30'), + ('0x1.10106eb4b44a2p+29', '0x1.ef0596cdc97f8p+29'), + ('0x1.459c058e20bb7p+30', '0x1.993ca009b9178p+29'), + ('0x1.378371ae67c0cp+30', '0x1.fbe6619854b4cp+29'), + ('0x1.f4cd0574fb97ap+29', '0x1.50fe31669340ep+30'), + ('0x1.494b2cdd3d446p+29', '0x1.212a5367b4c7cp+29'), + ('0x1.f84e649f1e46dp+29', '0x1.1fa56bef8eec4p+30'), + ('0x1.2e817edd3d6fap+30', '0x1.eb0814f1e9602p+29'), + ('0x1.0d3a6e3d04245p+29', '0x1.32a62fea52352p+30'), + ('0x1.888e19611bfc5p+29', '0x1.52b8e70b24353p+29'), + + # Cases with 2 ulp error in Python 3.8 + ('0x1.538816d48a13fp+29', '0x1.7967c5ca43e16p+29'), + ('0x1.57b47b7234530p+29', '0x1.74e2c7040e772p+29'), + ('0x1.821b685e9b168p+30', '0x1.677dc1c1e3dc6p+29'), + ('0x1.9e8247f67097bp+29', '0x1.24bd2dc4f4baep+29'), + ('0x1.b73b59e0cb5f9p+29', '0x1.da899ab784a97p+28'), + ('0x1.94a8d2842a7cfp+30', '0x1.326a51d4d8d8ap+30'), + ('0x1.e930b9cd99035p+29', '0x1.5a1030e18dff9p+30'), + ('0x1.1592bbb0e4690p+29', '0x1.a9c337b33fb9ap+29'), + ('0x1.1243a50751fd4p+29', '0x1.a5a10175622d9p+29'), + ('0x1.57a8596e74722p+30', '0x1.42d1af9d04da9p+30'), + + # Cases with 1 ulp error in version fff3c28052e6b0750d6218e00acacd2fded4991a + ('0x1.ee7dbd9565899p+29', '0x1.7ab4d6fc6e4b4p+29'), + ('0x1.5c6bfbec5c4dcp+30', '0x1.02511184b4970p+30'), + ('0x1.59dcebba995cap+30', '0x1.50ca7e7c38854p+29'), + ('0x1.768cdd94cf5aap+29', '0x1.9cfdc5571d38ep+29'), + ('0x1.dcf137d60262ep+29', '0x1.1101621990b3ep+30'), + ('0x1.3a2d006e288b0p+30', '0x1.e9a240914326cp+29'), + ('0x1.62a32f7f53c61p+29', '0x1.47eb6cd72684fp+29'), + ('0x1.d3bcb60748ef2p+29', '0x1.3f13c4056312cp+30'), + ('0x1.282bdb82f17f3p+30', '0x1.640ba4c4eed3ap+30'), + ('0x1.89d8c423ea0c6p+29', '0x1.d35dcfe902bc3p+29'), + ]: + with self.subTest(hx=hx, hy=hy): + x = float.fromhex(hx) + y = float.fromhex(hy) + with decimal.localcontext(high_precision): + z = float((Decimal(x)**2 + Decimal(y)**2).sqrt()) + self.assertEqual(hypot(x, y), z) + def testDist(self): from decimal import Decimal as D from fractions import Fraction as F diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 29137ae..ecd291e 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2429,7 +2429,7 @@ magnitude. We avoid this cost by arranging the calculation so that fabs(csum) is always as large as fabs(x). To establish the invariant, *csum* is initialized to 1.0 which is -always larger than x**2 after scaling or division by *max*. +always larger than x**2 after scaling or after division by *max*. After the loop is finished, the initial 1.0 is subtracted out for a net zero effect on the final sum. Since *csum* will be greater than 1.0, the subtraction of 1.0 will not cause fractional digits to be @@ -2458,7 +2458,7 @@ Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum. To minimize loss of information during the accumulation of fractional values, each term has a separate accumulator. This also breaks up sequential dependencies in the inner loop so the CPU can maximize -floating point throughput. [5] On a 2.6 GHz Haswell, adding one +floating point throughput. [4] On a 2.6 GHz Haswell, adding one dimension has an incremental cost of only 5ns -- for example when moving from hypot(x,y) to hypot(x,y,z). @@ -2470,7 +2470,7 @@ The differential correction starts with a value *x* that is the difference between the square of *h*, the possibly inaccurately rounded square root, and the accurately computed sum of squares. The correction is the first order term of the Maclaurin series -expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [4] +expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5] Essentially, this differential correction is equivalent to one refinement step in Newton's divide-and-average square root @@ -2492,10 +2492,10 @@ References: 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf 2. Compensated summation: http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf 3. Square root differential correction: https://arxiv.org/pdf/1904.09481.pdf -4. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 -5. https://bugs.python.org/file49439/hypot.png -6. https://bugs.python.org/file49435/best_frac.py -7. https://bugs.python.org/file49448/test_hypot_commutativity.py +4. Data dependency graph: https://bugs.python.org/file49439/hypot.png +5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 +6. Analysis of internal accuracy: https://bugs.python.org/file49435/best_frac.py +7. Commutativity test: https://bugs.python.org/file49448/test_hypot_commutativity.py */ |