diff options
author | Eric Smith <eric@trueblade.com> | 2008-02-20 23:34:22 (GMT) |
---|---|---|
committer | Eric Smith <eric@trueblade.com> | 2008-02-20 23:34:22 (GMT) |
commit | 7ef40bf9c77e0567e4fad656390b0a577f0e2080 (patch) | |
tree | d2357f8d16f0a53ff7c23c76ddd7f73768f6caaf | |
parent | e1b8e9c666335d6a392d32b5900d419019ca3936 (diff) | |
download | cpython-7ef40bf9c77e0567e4fad656390b0a577f0e2080.zip cpython-7ef40bf9c77e0567e4fad656390b0a577f0e2080.tar.gz cpython-7ef40bf9c77e0567e4fad656390b0a577f0e2080.tar.bz2 |
Trim leading zeros from a floating point exponent, per C99. See issue 1600. As far as I know, this only affects Windows. Add float type 'n' to PyOS_ascii_formatd (see PEP 3101 for 'n' description).
-rw-r--r-- | Lib/test/test_types.py | 38 | ||||
-rw-r--r-- | Misc/NEWS | 7 | ||||
-rw-r--r-- | Python/pystrtod.c | 154 |
3 files changed, 165 insertions, 34 deletions
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index fe2950a..bf218db 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -89,6 +89,29 @@ class TypesTests(unittest.TestCase): if float(1) == 1.0 and float(-1) == -1.0 and float(0) == 0.0: pass else: self.fail('float() does not work properly') + def test_float_to_string(self): + def test(f, result): + self.assertEqual(f.__format__('e'), result) + self.assertEqual('%e' % f, result) + + # test all 2 digit exponents, both with __format__ and with + # '%' formatting + for i in range(-99, 100): + test(float('1.5e'+str(i)), '1.500000e{0:+03d}'.format(i)) + + # test some 3 digit exponents + self.assertEqual(1.5e100.__format__('e'), '1.500000e+100') + self.assertEqual('%e' % 1.5e100, '1.500000e+100') + + self.assertEqual(1.5e101.__format__('e'), '1.500000e+101') + self.assertEqual('%e' % 1.5e101, '1.500000e+101') + + self.assertEqual(1.5e-100.__format__('e'), '1.500000e-100') + self.assertEqual('%e' % 1.5e-100, '1.500000e-100') + + self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101') + self.assertEqual('%e' % 1.5e-101, '1.500000e-101') + def test_normal_integers(self): # Ensure the first 256 integers are shared a = 256 @@ -486,16 +509,17 @@ class TypesTests(unittest.TestCase): test(-1.0, ' f', '-1.000000') test( 1.0, '+f', '+1.000000') test(-1.0, '+f', '-1.000000') + test(1.1234e90, 'f', '1.1234e+90') + test(1.1234e90, 'F', '1.1234e+90') test(1.1234e200, 'f', '1.1234e+200') test(1.1234e200, 'F', '1.1234e+200') - # temporarily removed. see issue 1600 - # test( 1.0, 'e', '1.000000e+00') - # test(-1.0, 'e', '-1.000000e+00') - # test( 1.0, 'E', '1.000000E+00') - # test(-1.0, 'E', '-1.000000E+00') - # test(1.1234e20, 'e', '1.123400e+20') - # test(1.1234e20, 'E', '1.123400E+20') + test( 1.0, 'e', '1.000000e+00') + test(-1.0, 'e', '-1.000000e+00') + test( 1.0, 'E', '1.000000E+00') + test(-1.0, 'E', '-1.000000E+00') + test(1.1234e20, 'e', '1.123400e+20') + test(1.1234e20, 'E', '1.123400E+20') # % formatting test(-1.0, '%', '-100.000000%') @@ -12,6 +12,13 @@ What's New in Python 2.6 alpha 1? Core and builtins ----------------- +- Issue #1600: Modifed PyOS_ascii_formatd to use at most 2 digit + exponents for exponents with absolute value < 100. Follows C99 + standard. This is a change on Windows, which would use 3 digits. + Also, added 'n' to the formats that PyOS_ascii_formatd understands, + so that any alterations it does to the resulting string will be + available in stringlib/formatter.h (for float.__format__). + - Implemented PEP 3101, Advanced String Formatting. This adds a new builtin format(); a format() method for str and unicode; a __format__() method to object, str, unicode, int, long, float, and diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 14fb84b..16efa9d 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -186,6 +186,15 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) } +/* From the C99 standard, section 7.19.6: +The exponent always contains at least two digits, and only as many more digits +as necessary to represent the exponent. +*/ +#define MIN_EXPONENT_DIGITS 2 + +/* see FORMATBUFLEN in unicodeobject.c */ +#define FLOAT_FORMATBUFLEN 120 + /** * PyOS_ascii_formatd: * @buffer: A buffer to place the resulting string in @@ -197,8 +206,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) * Converts a #gdouble to a string, using the '.' as * decimal point. To format the number you pass in * a printf()-style format string. Allowed conversion - * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'. + * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'. * + * 'n' is the same as 'g', except it uses the current locale. + * * Return value: The pointer to the buffer with the converted string. **/ char * @@ -207,17 +218,23 @@ PyOS_ascii_formatd(char *buffer, const char *format, double d) { - struct lconv *locale_data; - const char *decimal_point; - size_t decimal_point_len, rest_len; char *p; char format_char; + size_t format_len = strlen(format); + + /* For type 'n', we need to make a copy of the format string, because + we're going to modify 'n' -> 'g', and format is const char*, so we + can't modify it directly. FLOAT_FORMATBUFLEN should be longer than + we ever need this to be. There's an upcoming check to ensure it's + big enough. */ + char tmp_format[FLOAT_FORMATBUFLEN]; /* g_return_val_if_fail (buffer != NULL, NULL); */ /* g_return_val_if_fail (format[0] == '%', NULL); */ /* g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL); */ - format_char = format[strlen(format) - 1]; + /* The last character in the format string must be the format char */ + format_char = format[format_len - 1]; /* g_return_val_if_fail (format_char == 'e' || format_char == 'E' || */ /* format_char == 'f' || format_char == 'F' || */ @@ -227,43 +244,126 @@ PyOS_ascii_formatd(char *buffer, if (format[0] != '%') return NULL; + /* I'm not sure why this test is here. It's ensuring that the format + string after the first character doesn't have a single quote, a + lowercase l, or a percent. This is the reverse of the commented-out + test about 10 lines ago. */ if (strpbrk(format + 1, "'l%")) return NULL; if (!(format_char == 'e' || format_char == 'E' || format_char == 'f' || format_char == 'F' || - format_char == 'g' || format_char == 'G')) + format_char == 'g' || format_char == 'G' || + format_char == 'n')) return NULL; + /* Map 'n' format_char to 'g', by copying the format string and + replacing the final 'n' with a 'g' */ + if (format_char == 'n') { + if (format_len + 1 >= sizeof(tmp_format)) { + /* The format won't fit in our copy. Error out. In + practice, this will never happen and will be detected + by returning NULL */ + return NULL; + } + strcpy(tmp_format, format); + tmp_format[format_len - 1] = 'g'; + format = tmp_format; + } + /* Have PyOS_snprintf do the hard work */ PyOS_snprintf(buffer, buf_len, format, d); - locale_data = localeconv(); - decimal_point = locale_data->decimal_point; - decimal_point_len = strlen(decimal_point); + /* Get the current local, and find the decimal point character (or + string?). Convert that string back to a dot. Do not do this if + using the 'n' (number) format code. */ + if (format_char != 'n') { + struct lconv *locale_data = localeconv(); + const char *decimal_point = locale_data->decimal_point; + size_t decimal_point_len = strlen(decimal_point); + size_t rest_len; - assert(decimal_point_len != 0); + assert(decimal_point_len != 0); - if (decimal_point[0] != '.' || - decimal_point[1] != 0) - { - p = buffer; + if (decimal_point[0] != '.' || decimal_point[1] != 0) { + p = buffer; - if (*p == '+' || *p == '-') - p++; + if (*p == '+' || *p == '-') + p++; - while (isdigit((unsigned char)*p)) - p++; + while (isdigit(Py_CHARMASK(*p))) + p++; - if (strncmp(p, decimal_point, decimal_point_len) == 0) - { - *p = '.'; - p++; - if (decimal_point_len > 1) { - rest_len = strlen(p + (decimal_point_len - 1)); - memmove(p, p + (decimal_point_len - 1), - rest_len); - p[rest_len] = 0; + if (strncmp(p, decimal_point, decimal_point_len) == 0) { + *p = '.'; + p++; + if (decimal_point_len > 1) { + rest_len = strlen(p + + (decimal_point_len - 1)); + memmove(p, p + (decimal_point_len - 1), + rest_len); + p[rest_len] = 0; + } + } + } + } + + /* If an exponent exists, ensure that the exponent is at least + MIN_EXPONENT_DIGITS digits, providing the buffer is large enough + for the extra zeros. Also, if there are more than + MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get + back to MIN_EXPONENT_DIGITS */ + p = strpbrk(buffer, "eE"); + if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { + char *start = p + 2; + int exponent_digit_cnt = 0; + int leading_zero_cnt = 0; + int in_leading_zeros = 1; + int significant_digit_cnt; + + p += 2; + while (*p && isdigit(Py_CHARMASK(*p))) { + if (in_leading_zeros && *p == '0') + ++leading_zero_cnt; + if (*p != '0') + in_leading_zeros = 0; + ++p; + ++exponent_digit_cnt; + } + + significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; + if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { + /* If there are 2 exactly digits, we're done, + regardless of what they contain */ + } + else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { + int extra_zeros_cnt; + + /* There are more than 2 digits in the exponent. See + if we can delete some of the leading zeros */ + if (significant_digit_cnt < MIN_EXPONENT_DIGITS) + significant_digit_cnt = MIN_EXPONENT_DIGITS; + extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt; + + /* Delete extra_zeros_cnt worth of characters from the + front of the exponent */ + assert(extra_zeros_cnt >= 0); + + /* Add one to significant_digit_cnt to copy the + trailing 0 byte, thus setting the length */ + memmove(start, + start + extra_zeros_cnt, + significant_digit_cnt + 1); + } + else { + /* If there are fewer than 2 digits, add zeros + until there are 2, if there's enough room */ + int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; + if (start + zeros + exponent_digit_cnt + 1 + < buffer + buf_len) { + memmove(start + zeros, start, + exponent_digit_cnt + 1); + memset(start, '0', zeros); } } } |