From 827b35c9fed9e842c96b41198b26cee96a3e679b Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 10 Dec 2007 22:19:17 +0000 Subject: Issue #1580: New free format floating point representation based on "Floating-Point Printer Sample Code", by Robert G. Burger. For example repr(11./5) now returns '2.2' instead of '2.2000000000000002'. Thanks to noam for the patch! I had to modify doubledigits.c slightly to support X64 and IA64 machines on Windows. I also added the new file to the three project files. --- Include/floatobject.h | 4 + Lib/test/test_float.py | 17 +- Makefile.pre.in | 1 + Misc/NEWS | 4 + Objects/doubledigits.c | 601 ++++++++++++++++++++++++++++++++++ Objects/floatobject.c | 108 +++++- PCbuild/pythoncore.vcproj | 3 + PCbuild8/pythoncore/pythoncore.vcproj | 4 + PCbuild9/pythoncore.vcproj | 4 + 9 files changed, 743 insertions(+), 3 deletions(-) create mode 100644 Objects/doubledigits.c diff --git a/Include/floatobject.h b/Include/floatobject.h index 3ec5af5..b9c79c7 100644 --- a/Include/floatobject.h +++ b/Include/floatobject.h @@ -76,6 +76,10 @@ PyAPI_FUNC(int) _PyFloat_Pack8(double x, unsigned char *p, int le); */ PyAPI_FUNC(int) _PyFloat_Repr(double x, char *p, size_t len); +/* Used to get the important decimal digits of a double */ +PyAPI_FUNC(int) _PyFloat_Digits(char *buf, double v, int *signum); +PyAPI_FUNC(void) _PyFloat_DigitsInit(void); + /* The unpack routines read 4 or 8 bytes, starting at p. le is a bool * argument, true if the string is in little-endian format (exponent * last, at p+3 or p+7), false if big-endian (exponent first, at p). diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 4360c54..2ba6dbc 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -1,5 +1,6 @@ import unittest, struct +import os from test import test_support class FormatFunctionsTestCase(unittest.TestCase): @@ -146,12 +147,26 @@ class FormatTestCase(unittest.TestCase): self.assertRaises(ValueError, format, 3.0, "s") +class ReprTestCase(unittest.TestCase): + def test_repr(self): + floats_file = open(os.path.join(os.path.split(__file__)[0], + 'floating_points.txt')) + for line in floats_file: + line = line.strip() + if not line or line.startswith('#'): + continue + v = eval(line) + self.assertEqual(v, eval(repr(v))) + floats_file.close() + + def test_main(): test_support.run_unittest( FormatFunctionsTestCase, UnknownFormatTestCase, IEEEFormatTestCase, - FormatTestCase) + FormatTestCase, + ReprTestCase) if __name__ == '__main__': test_main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 3cf41a6..8197b80 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -301,6 +301,7 @@ OBJECT_OBJS= \ Objects/genobject.o \ Objects/fileobject.o \ Objects/floatobject.o \ + Objects/doubledigits.o \ Objects/frameobject.o \ Objects/funcobject.o \ Objects/iterobject.o \ diff --git a/Misc/NEWS b/Misc/NEWS index 3e3f0e4..61e150c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.0a3? Core and Builtins ----------------- +- Issue #1580: New free format floating point representation based on + "Floating-Point Printer Sample Code", by Robert G. Burger. For example + repr(11./5) now returns '2.2' instead of '2.2000000000000002'. + - Issue #1573: Improper use of the keyword-only syntax makes the parser crash - Issue #1564: The set implementation should special-case PyUnicode instead diff --git a/Objects/doubledigits.c b/Objects/doubledigits.c new file mode 100644 index 0000000..1f1c91c --- /dev/null +++ b/Objects/doubledigits.c @@ -0,0 +1,601 @@ +/* Free-format floating point printer + * + * Based on "Floating-Point Printer Sample Code", by Robert G. Burger, + * http://www.cs.indiana.edu/~burger/fp/index.html + */ + +#include "Python.h" + +#if defined(__alpha) || defined(__i386) || defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64) +#define LITTLE_ENDIAN_IEEE_DOUBLE +#elif !(defined(__ppc__) || defined(sparc) || defined(__sgi) || defined(_IBMR2) || defined(hpux)) +#error unknown machine type +#endif + +#if defined(_M_IX86) +#define UNSIGNED64 unsigned __int64 +#elif defined(__alpha) +#define UNSIGNED64 unsigned long +#else +#define UNSIGNED64 unsigned long long +#endif + +#ifndef U32 +#define U32 unsigned int +#endif + +/* exponent stored + 1024, hidden bit to left of decimal point */ +#define bias 1023 +#define bitstoright 52 +#define m1mask 0xf +#define hidden_bit 0x100000 +#ifdef LITTLE_ENDIAN_IEEE_DOUBLE +struct dblflt { + unsigned int m4: 16; + unsigned int m3: 16; + unsigned int m2: 16; + unsigned int m1: 4; + unsigned int e: 11; + unsigned int s: 1; +}; +#else +/* Big Endian IEEE Double Floats */ +struct dblflt { + unsigned int s: 1; + unsigned int e: 11; + unsigned int m1: 4; + unsigned int m2: 16; + unsigned int m3: 16; + unsigned int m4: 16; +}; +#endif +#define float_radix 2.147483648e9 + + +typedef UNSIGNED64 Bigit; +#define BIGSIZE 24 +#define MIN_E -1074 +#define MAX_FIVE 325 +#define B_P1 ((Bigit)1 << 52) + +typedef struct { + int l; + Bigit d[BIGSIZE]; +} Bignum; + +static Bignum R, S, MP, MM, five[MAX_FIVE]; +static Bignum S2, S3, S4, S5, S6, S7, S8, S9; +static int ruf, k, s_n, use_mp, qr_shift, sl, slr; + +static void mul10(Bignum *x); +static void big_short_mul(Bignum *x, Bigit y, Bignum *z); +/* +static void print_big(Bignum *x); +*/ +static int estimate(int n); +static void one_shift_left(int y, Bignum *z); +static void short_shift_left(Bigit x, int y, Bignum *z); +static void big_shift_left(Bignum *x, int y, Bignum *z); +static int big_comp(Bignum *x, Bignum *y); +static int sub_big(Bignum *x, Bignum *y, Bignum *z); +static void add_big(Bignum *x, Bignum *y, Bignum *z); +static int add_cmp(void); +static int qr(void); + +/*static int _PyFloat_Digits(char *buf, double v, int *signum);*/ +/*static void _PyFloat_DigitsInit(void);*/ + +#define ADD(x, y, z, k) {\ + Bigit x_add, z_add;\ + x_add = (x);\ + if ((k))\ + z_add = x_add + (y) + 1, (k) = (z_add <= x_add);\ + else\ + z_add = x_add + (y), (k) = (z_add < x_add);\ + (z) = z_add;\ +} + +#define SUB(x, y, z, b) {\ + Bigit x_sub, y_sub;\ + x_sub = (x); y_sub = (y);\ + if ((b))\ + (z) = x_sub - y_sub - 1, b = (y_sub >= x_sub);\ + else\ + (z) = x_sub - y_sub, b = (y_sub > x_sub);\ +} + +#define MUL(x, y, z, k) {\ + Bigit x_mul, low, high;\ + x_mul = (x);\ + low = (x_mul & 0xffffffff) * (y) + (k);\ + high = (x_mul >> 32) * (y) + (low >> 32);\ + (k) = high >> 32;\ + (z) = (low & 0xffffffff) | (high << 32);\ +} + +#define SLL(x, y, z, k) {\ + Bigit x_sll = (x);\ + (z) = (x_sll << (y)) | (k);\ + (k) = x_sll >> (64 - (y));\ +} + +static void +mul10(Bignum *x) +{ + int i, l; + Bigit *p, k; + + l = x->l; + for (i = l, p = &x->d[0], k = 0; i >= 0; i--) + MUL(*p, 10, *p++, k); + if (k != 0) + *p = k, x->l = l+1; +} + +static void +big_short_mul(Bignum *x, Bigit y, Bignum *z) +{ + int i, xl, zl; + Bigit *xp, *zp, k; + U32 high, low; + + xl = x->l; + xp = &x->d[0]; + zl = xl; + zp = &z->d[0]; + high = y >> 32; + low = y & 0xffffffff; + for (i = xl, k = 0; i >= 0; i--, xp++, zp++) { + Bigit xlow, xhigh, z0, t, c, z1; + xlow = *xp & 0xffffffff; + xhigh = *xp >> 32; + z0 = (xlow * low) + k; /* Cout is (z0 < k) */ + t = xhigh * low; + z1 = (xlow * high) + t; + c = (z1 < t); + t = z0 >> 32; + z1 += t; + c += (z1 < t); + *zp = (z1 << 32) | (z0 & 0xffffffff); + k = (xhigh * high) + (c << 32) + (z1 >> 32) + (z0 < k); + } + if (k != 0) + *zp = k, zl++; + z->l = zl; +} + +/* +static void +print_big(Bignum *x) +{ + int i; + Bigit *p; + + printf("#x"); + i = x->l; + p = &x->d[i]; + for (p = &x->d[i]; i >= 0; i--) { + Bigit b = *p--; + printf("%08x%08x", (int)(b >> 32), (int)(b & 0xffffffff)); + } +} +*/ + +static int +estimate(int n) +{ + if (n < 0) + return (int)(n*0.3010299956639812); + else + return 1+(int)(n*0.3010299956639811); +} + +static void +one_shift_left(int y, Bignum *z) +{ + int n, m, i; + Bigit *zp; + + n = y / 64; + m = y % 64; + zp = &z->d[0]; + for (i = n; i > 0; i--) *zp++ = 0; + *zp = (Bigit)1 << m; + z->l = n; +} + +static void +short_shift_left(Bigit x, int y, Bignum *z) +{ + int n, m, i, zl; + Bigit *zp; + + n = y / 64; + m = y % 64; + zl = n; + zp = &(z->d[0]); + for (i = n; i > 0; i--) *zp++ = 0; + if (m == 0) + *zp = x; + else { + Bigit high = x >> (64 - m); + *zp = x << m; + if (high != 0) + *++zp = high, zl++; + } + z->l = zl; +} + +static void +big_shift_left(Bignum *x, int y, Bignum *z) +{ + int n, m, i, xl, zl; + Bigit *xp, *zp, k; + + n = y / 64; + m = y % 64; + xl = x->l; + xp = &(x->d[0]); + zl = xl + n; + zp = &(z->d[0]); + for (i = n; i > 0; i--) *zp++ = 0; + if (m == 0) + for (i = xl; i >= 0; i--) *zp++ = *xp++; + else { + for (i = xl, k = 0; i >= 0; i--) + SLL(*xp++, m, *zp++, k); + if (k != 0) + *zp = k, zl++; + } + z->l = zl; +} + + +static int +big_comp(Bignum *x, Bignum *y) +{ + int i, xl, yl; + Bigit *xp, *yp; + + xl = x->l; + yl = y->l; + if (xl > yl) return 1; + if (xl < yl) return -1; + xp = &x->d[xl]; + yp = &y->d[xl]; + for (i = xl; i >= 0; i--, xp--, yp--) { + Bigit a = *xp; + Bigit b = *yp; + + if (a > b) return 1; + else if (a < b) return -1; + } + return 0; +} + +static int +sub_big(Bignum *x, Bignum *y, Bignum *z) +{ + int xl, yl, zl, b, i; + Bigit *xp, *yp, *zp; + + xl = x->l; + yl = y->l; + if (yl > xl) return 1; + xp = &x->d[0]; + yp = &y->d[0]; + zp = &z->d[0]; + + for (i = yl, b = 0; i >= 0; i--) + SUB(*xp++, *yp++, *zp++, b); + for (i = xl-yl; b && i > 0; i--) { + Bigit x_sub; + x_sub = *xp++; + *zp++ = x_sub - 1; + b = (x_sub == 0); + } + for (; i > 0; i--) *zp++ = *xp++; + if (b) return 1; + zl = xl; + while (*--zp == 0) zl--; + z->l = zl; + return 0; +} + +static void +add_big(Bignum *x, Bignum *y, Bignum *z) +{ + int xl, yl, k, i; + Bigit *xp, *yp, *zp; + + xl = x->l; + yl = y->l; + if (yl > xl) { + int tl; + Bignum *tn; + tl = xl; xl = yl; yl = tl; + tn = x; x = y; y = tn; + } + + xp = &x->d[0]; + yp = &y->d[0]; + zp = &z->d[0]; + + for (i = yl, k = 0; i >= 0; i--) + ADD(*xp++, *yp++, *zp++, k); + for (i = xl-yl; k && i > 0; i--) { + Bigit z_add; + z_add = *xp++ + 1; + k = (z_add == 0); + *zp++ = z_add; + } + for (; i > 0; i--) *zp++ = *xp++; + if (k) + *zp = 1, z->l = xl+1; + else + z->l = xl; +} + +static int +add_cmp() +{ + int rl, ml, sl, suml; + static Bignum sum; + + rl = R.l; + ml = (use_mp ? MP.l : MM.l); + sl = S.l; + + suml = rl >= ml ? rl : ml; + if ((sl > suml+1) || ((sl == suml+1) && (S.d[sl] > 1))) return -1; + if (sl < suml) return 1; + + add_big(&R, (use_mp ? &MP : &MM), &sum); + return big_comp(&sum, &S); +} + +static int +qr() +{ + if (big_comp(&R, &S5) < 0) + if (big_comp(&R, &S2) < 0) + if (big_comp(&R, &S) < 0) + return 0; + else { + sub_big(&R, &S, &R); + return 1; + } + else if (big_comp(&R, &S3) < 0) { + sub_big(&R, &S2, &R); + return 2; + } + else if (big_comp(&R, &S4) < 0) { + sub_big(&R, &S3, &R); + return 3; + } + else { + sub_big(&R, &S4, &R); + return 4; + } + else if (big_comp(&R, &S7) < 0) + if (big_comp(&R, &S6) < 0) { + sub_big(&R, &S5, &R); + return 5; + } + else { + sub_big(&R, &S6, &R); + return 6; + } + else if (big_comp(&R, &S9) < 0) + if (big_comp(&R, &S8) < 0) { + sub_big(&R, &S7, &R); + return 7; + } + else { + sub_big(&R, &S8, &R); + return 8; + } + else { + sub_big(&R, &S9, &R); + return 9; + } +} + +#define OUTDIG(d) { *buf++ = (d) + '0'; *buf = 0; return k; } + +int +_PyFloat_Digits(char *buf, double v, int *signum) +{ + struct dblflt *x; + int sign, e, f_n, m_n, i, d, tc1, tc2; + Bigit f; + + /* decompose float into sign, mantissa & exponent */ + x = (struct dblflt *)&v; + sign = x->s; + e = x->e; + f = (Bigit)(x->m1 << 16 | x->m2) << 32 | (U32)(x->m3 << 16 | x->m4); + if (e != 0) { + e = e - bias - bitstoright; + f |= (Bigit)hidden_bit << 32; + } + else if (f != 0) + /* denormalized */ + e = 1 - bias - bitstoright; + + *signum = sign; + if (f == 0) { + *buf++ = '0'; + *buf = 0; + return 0; + } + + ruf = !(f & 1); /* ruf = (even? f) */ + + /* Compute the scaling factor estimate, k */ + if (e > MIN_E) + k = estimate(e+52); + else { + int n; + Bigit y; + + for (n = e+52, y = (Bigit)1 << 52; f < y; n--) y >>= 1; + k = estimate(n); + } + + if (e >= 0) + if (f != B_P1) + use_mp = 0, f_n = e+1, s_n = 1, m_n = e; + else + use_mp = 1, f_n = e+2, s_n = 2, m_n = e; + else + if ((e == MIN_E) || (f != B_P1)) + use_mp = 0, f_n = 1, s_n = 1-e, m_n = 0; + else + use_mp = 1, f_n = 2, s_n = 2-e, m_n = 0; + + /* Scale it! */ + if (k == 0) { + short_shift_left(f, f_n, &R); + one_shift_left(s_n, &S); + one_shift_left(m_n, &MM); + if (use_mp) one_shift_left(m_n+1, &MP); + qr_shift = 1; + } + else if (k > 0) { + s_n += k; + if (m_n >= s_n) + f_n -= s_n, m_n -= s_n, s_n = 0; + else + f_n -= m_n, s_n -= m_n, m_n = 0; + short_shift_left(f, f_n, &R); + big_shift_left(&five[k-1], s_n, &S); + one_shift_left(m_n, &MM); + if (use_mp) one_shift_left(m_n+1, &MP); + qr_shift = 0; + } + else { + Bignum *power = &five[-k-1]; + + s_n += k; + big_short_mul(power, f, &S); + big_shift_left(&S, f_n, &R); + one_shift_left(s_n, &S); + big_shift_left(power, m_n, &MM); + if (use_mp) big_shift_left(power, m_n+1, &MP); + qr_shift = 1; + } + + /* fixup */ + if (add_cmp() <= -ruf) { + k--; + mul10(&R); + mul10(&MM); + if (use_mp) mul10(&MP); + } + + /* + printf("\nk = %d\n", k); + printf("R = "); print_big(&R); + printf("\nS = "); print_big(&S); + printf("\nM- = "); print_big(&MM); + if (use_mp) printf("\nM+ = "), print_big(&MP); + putchar('\n'); + fflush(0); + */ + + if (qr_shift) { + sl = s_n / 64; + slr = s_n % 64; + } + else { + big_shift_left(&S, 1, &S2); + add_big(&S2, &S, &S3); + big_shift_left(&S2, 1, &S4); + add_big(&S4, &S, &S5); + add_big(&S4, &S2, &S6); + add_big(&S4, &S3, &S7); + big_shift_left(&S4, 1, &S8); + add_big(&S8, &S, &S9); + } + +again: + if (qr_shift) { /* Take advantage of the fact that S = (ash 1 s_n) */ + if (R.l < sl) + d = 0; + else if (R.l == sl) { + Bigit *p; + + p = &R.d[sl]; + d = *p >> slr; + *p &= ((Bigit)1 << slr) - 1; + for (i = sl; (i > 0) && (*p == 0); i--) p--; + R.l = i; + } + else { + Bigit *p; + + p = &R.d[sl+1]; + d = *p << (64 - slr) | *(p-1) >> slr; + p--; + *p &= ((Bigit)1 << slr) - 1; + for (i = sl; (i > 0) && (*p == 0); i--) p--; + R.l = i; + } + } + else /* We need to do quotient-remainder */ + d = qr(); + + tc1 = big_comp(&R, &MM) < ruf; + tc2 = add_cmp() > -ruf; + if (!tc1) + if (!tc2) { + mul10(&R); + mul10(&MM); + if (use_mp) mul10(&MP); + *buf++ = d + '0'; + goto again; + } + else + OUTDIG(d+1) + else + if (!tc2) + OUTDIG(d) + else { + big_shift_left(&R, 1, &MM); + if (big_comp(&MM, &S) == -1) + OUTDIG(d) + else + OUTDIG(d+1) + } +} + +void +_PyFloat_DigitsInit() +{ + int n, i, l; + Bignum *b; + Bigit *xp, *zp, k; + + five[0].l = l = 0; + five[0].d[0] = 5; + for (n = MAX_FIVE-1, b = &five[0]; n > 0; n--) { + xp = &b->d[0]; + b++; + zp = &b->d[0]; + for (i = l, k = 0; i >= 0; i--) + MUL(*xp++, 5, *zp++, k); + if (k != 0) + *zp = k, l++; + b->l = l; + } + + /* + for (n = 1, b = &five[0]; n <= MAX_FIVE; n++) { + big_shift_left(b++, n, &R); + print_big(&R); + putchar('\n'); + } + fflush(0); + */ +} diff --git a/Objects/floatobject.c b/Objects/floatobject.c index d346ef6..b17f7be 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -281,6 +281,107 @@ format_float(char *buf, size_t buflen, PyFloatObject *v, int precision) format_double(buf, buflen, PyFloat_AS_DOUBLE(v), precision); } +/* The following function is based on Tcl_PrintDouble, + * from tclUtil.c. + */ + +#define is_infinite(d) ( (d) > DBL_MAX || (d) < -DBL_MAX ) +#define is_nan(d) ((d) != (d)) + +static void +format_double_repr(char *dst, double value) +{ + char *p, c; + int exp; + int signum; + char buffer[30]; + + /* + * Handle NaN. + */ + + if (is_nan(value)) { + strcpy(dst, "nan"); + return; + } + + /* + * Handle infinities. + */ + + if (is_infinite(value)) { + if (value < 0) { + strcpy(dst, "-inf"); + } else { + strcpy(dst, "inf"); + } + return; + } + + /* + * Ordinary (normal and denormal) values. + */ + + exp = _PyFloat_Digits(buffer, value, &signum)+1; + if (signum) { + *dst++ = '-'; + } + p = buffer; + if (exp < -3 || exp > 17) { + /* + * E format for numbers < 1e-3 or >= 1e17. + */ + + *dst++ = *p++; + c = *p; + if (c != '\0') { + *dst++ = '.'; + while (c != '\0') { + *dst++ = c; + c = *++p; + } + } + sprintf(dst, "e%+d", exp-1); + } else { + /* + * F format for others. + */ + + if (exp <= 0) { + *dst++ = '0'; + } + c = *p; + while (exp-- > 0) { + if (c != '\0') { + *dst++ = c; + c = *++p; + } else { + *dst++ = '0'; + } + } + *dst++ = '.'; + if (c == '\0') { + *dst++ = '0'; + } else { + while (++exp < 0) { + *dst++ = '0'; + } + while (c != '\0') { + *dst++ = c; + c = *++p; + } + } + *dst++ = '\0'; + } +} + +static void +format_float_repr(char *buf, PyFloatObject *v) +{ + assert(PyFloat_Check(v)); + format_double_repr(buf, PyFloat_AS_DOUBLE(v)); +} + /* Macro and helper that convert PyObject obj to a C double and store the value in dbl. If conversion to double raises an exception, obj is set to NULL, and the function invoking this macro returns NULL. If @@ -333,8 +434,8 @@ convert_to_double(PyObject **v, double *dbl) static PyObject * float_repr(PyFloatObject *v) { - char buf[100]; - format_float(buf, sizeof(buf), v, PREC_REPR); + char buf[30]; + format_float_repr(buf, v); return PyUnicode_FromString(buf); } @@ -1226,6 +1327,9 @@ _PyFloat_Init(void) double_format = detected_double_format; float_format = detected_float_format; + + /* Initialize floating point repr */ + _PyFloat_DigitsInit(); } void diff --git a/PCbuild/pythoncore.vcproj b/PCbuild/pythoncore.vcproj index 19cd833..326eacc 100644 --- a/PCbuild/pythoncore.vcproj +++ b/PCbuild/pythoncore.vcproj @@ -488,6 +488,9 @@ RelativePath="..\Objects\dictobject.c"> + + + + diff --git a/PCbuild9/pythoncore.vcproj b/PCbuild9/pythoncore.vcproj index e41688c..646cb4f 100644 --- a/PCbuild9/pythoncore.vcproj +++ b/PCbuild9/pythoncore.vcproj @@ -1363,6 +1363,10 @@ > + + -- cgit v0.12