From 0a95063d732b6f309a0cbf73e6095fb8774e5914 Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Wed, 30 Apr 2008 01:09:30 +0000 Subject: Issue 2526, float.__format__ 'n' specifier does not support thousands grouping. Implemented grouping, with tests. Cleaned up PyOS_ascii_formatd by breaking reformatting into smaller functions. --- Lib/test/test_types.py | 12 +- Python/pystrtod.c | 357 +++++++++++++++++++++++++++++++------------------ 2 files changed, 240 insertions(+), 129 deletions(-) diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 4fa2c01..4b620c5 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -1,8 +1,9 @@ # Python test set -- part 6, built-in types -from test.test_support import run_unittest, have_unicode +from test.test_support import run_unittest, have_unicode, run_with_locale import unittest import sys +import locale class TypesTests(unittest.TestCase): @@ -476,6 +477,15 @@ class TypesTests(unittest.TestCase): self.assertEqual(value.__format__(format_spec), float(value).__format__(format_spec)) + @run_with_locale('LC_NUMERIC', 'en_US.UTF8') + def test_float__format__locale(self): + # test locale support for __format__ code 'n' + + for i in range(-10, 10): + x = 1234567890.0 * (10.0 ** i) + self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n')) + self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n')) + def test_float__format__(self): # these should be rewritten to use both format(x, spec) and # x.__format__(spec) diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 2ca8402..0912cec 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) return val; } +/* Given a string that may have a decimal point in the current + locale, change it back to a dot. Since the string cannot get + longer, no need for a maximum buffer size parameter. */ +Py_LOCAL_INLINE(void) +change_decimal_from_locale_to_dot(char* buffer) +{ + struct lconv *locale_data = localeconv(); + const char *decimal_point = locale_data->decimal_point; + + if (decimal_point[0] != '.' || decimal_point[1] != 0) { + size_t decimal_point_len = strlen(decimal_point); + + if (*buffer == '+' || *buffer == '-') + buffer++; + while (isdigit(Py_CHARMASK(*buffer))) + buffer++; + if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { + *buffer = '.'; + buffer++; + if (decimal_point_len > 1) { + /* buffer needs to get smaller */ + size_t rest_len = strlen(buffer + + (decimal_point_len - 1)); + memmove(buffer, + buffer + (decimal_point_len - 1), + rest_len); + buffer[rest_len] = 0; + } + } + } +} + /* From the C99 standard, section 7.19.6: The exponent always contains at least two digits, and only as many more digits @@ -194,6 +226,189 @@ as necessary to represent the exponent. */ #define MIN_EXPONENT_DIGITS 2 +/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS + in length. */ +Py_LOCAL_INLINE(void) +ensure_minumim_exponent_length(char* buffer, size_t buf_size) +{ + char *p = strpbrk(buffer, "eE"); + if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { + char *start = p + 2; + int exponent_digit_cnt = 0; + int leading_zero_cnt = 0; + int in_leading_zeros = 1; + int significant_digit_cnt; + + /* Skip over the exponent and the sign. */ + p += 2; + + /* Find the end of the exponent, keeping track of leading + zeros. */ + while (*p && isdigit(Py_CHARMASK(*p))) { + if (in_leading_zeros && *p == '0') + ++leading_zero_cnt; + if (*p != '0') + in_leading_zeros = 0; + ++p; + ++exponent_digit_cnt; + } + + significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; + if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { + /* If there are 2 exactly digits, we're done, + regardless of what they contain */ + } + else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { + int extra_zeros_cnt; + + /* There are more than 2 digits in the exponent. See + if we can delete some of the leading zeros */ + if (significant_digit_cnt < MIN_EXPONENT_DIGITS) + significant_digit_cnt = MIN_EXPONENT_DIGITS; + extra_zeros_cnt = exponent_digit_cnt - + significant_digit_cnt; + + /* Delete extra_zeros_cnt worth of characters from the + front of the exponent */ + assert(extra_zeros_cnt >= 0); + + /* Add one to significant_digit_cnt to copy the + trailing 0 byte, thus setting the length */ + memmove(start, + start + extra_zeros_cnt, + significant_digit_cnt + 1); + } + else { + /* If there are fewer than 2 digits, add zeros + until there are 2, if there's enough room */ + int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; + if (start + zeros + exponent_digit_cnt + 1 + < buffer + buf_size) { + memmove(start + zeros, start, + exponent_digit_cnt + 1); + memset(start, '0', zeros); + } + } + } +} + +/* Ensure that buffer has a decimal point in it. The decimal point + will not be in the current locale, it will always be '.' */ +Py_LOCAL_INLINE(void) +ensure_decimal_point(char* buffer, size_t buf_size) +{ + int insert_count = 0; + char* chars_to_insert; + + /* search for the first non-digit character */ + char *p = buffer; + while (*p && isdigit(Py_CHARMASK(*p))) + ++p; + + if (*p == '.') { + if (isdigit(Py_CHARMASK(*(p+1)))) { + /* Nothing to do, we already have a decimal + point and a digit after it */ + } + else { + /* We have a decimal point, but no following + digit. Insert a zero after the decimal. */ + ++p; + chars_to_insert = "0"; + insert_count = 1; + } + } + else { + chars_to_insert = ".0"; + insert_count = 2; + } + if (insert_count) { + size_t buf_len = strlen(buffer); + if (buf_len + insert_count + 1 >= buf_size) { + /* If there is not enough room in the buffer + for the additional text, just skip it. It's + not worth generating an error over. */ + } + else { + memmove(p + insert_count, p, + buffer + strlen(buffer) - p + 1); + memcpy(p, chars_to_insert, insert_count); + } + } +} + +/* Add the locale specific grouping characters to buffer. Note + that any decimal point (if it's present) in buffer is already + locale-specific. Return 0 on error, else 1. */ +Py_LOCAL_INLINE(int) +add_thousands_grouping(char* buffer, size_t buf_size) +{ + struct lconv *locale_data = localeconv(); + const char *grouping = locale_data->grouping; + const char *thousands_sep = locale_data->thousands_sep; + size_t thousands_sep_len = strlen(thousands_sep); + const char *decimal_point = locale_data->decimal_point; + char *pend = buffer + strlen(buffer); /* current end of buffer */ + char *pmax = buffer + buf_size; /* max of buffer */ + char current_grouping; + + /* Find the decimal point, if any. We're only concerned + about the characters to the left of the decimal when + adding grouping. */ + char *p = strstr(buffer, decimal_point); + if (!p) { + /* No decimal, use the entire string. */ + + /* If any exponent, adjust p. */ + p = strpbrk(buffer, "eE"); + if (!p) + /* No exponent and no decimal. Use the entire + string. */ + p = pend; + } + /* At this point, p points just past the right-most character we + want to format. We need to add the grouping string for the + characters between buffer and p. */ + + /* Starting at p and working right-to-left, keep track of + what grouping needs to be added and insert that. */ + current_grouping = *grouping++; + + /* If the first character is 0, perform no grouping at all. */ + if (current_grouping == 0) + return 1; + + while (p - buffer > current_grouping) { + /* Always leave buffer and pend valid at the end of this + loop, since we might leave with a return statement. */ + + /* Is there room to insert thousands_sep_len chars?. */ + if (pmax - pend <= thousands_sep_len) + /* No room. */ + return 0; + + /* Move the rest of the string down. */ + p -= current_grouping; + memmove(p + thousands_sep_len, + p, + pend - p + 1); + /* Adjust end pointer. */ + pend += thousands_sep_len; + /* Copy the thousands_sep chars into the buffer. */ + memcpy(p, thousands_sep, thousands_sep_len); + + /* Move to the next grouping character, unless we're + repeating (which is designated by a grouping of 0). */ + if (*grouping != 0) { + current_grouping = *grouping++; + if (current_grouping == CHAR_MAX) + /* We're done. */ + return 1; + } + } + return 1; +} + /* see FORMATBUFLEN in unicodeobject.c */ #define FLOAT_FORMATBUFLEN 120 @@ -222,7 +437,6 @@ PyOS_ascii_formatd(char *buffer, const char *format, double d) { - char *p; char format_char; size_t format_len = strlen(format); @@ -277,144 +491,31 @@ PyOS_ascii_formatd(char *buffer, /* Have PyOS_snprintf do the hard work */ PyOS_snprintf(buffer, buf_size, format, d); - /* Get the current local, and find the decimal point character (or - string?). Convert that string back to a dot. Do not do this if - using the 'n' (number) format code. */ - if (format_char != 'n') { - struct lconv *locale_data = localeconv(); - const char *decimal_point = locale_data->decimal_point; - size_t decimal_point_len = strlen(decimal_point); - size_t rest_len; - - assert(decimal_point_len != 0); + /* Do various fixups on the return string */ - if (decimal_point[0] != '.' || decimal_point[1] != 0) { - p = buffer; - - if (*p == '+' || *p == '-') - p++; - - while (isdigit(Py_CHARMASK(*p))) - p++; - - if (strncmp(p, decimal_point, - decimal_point_len) == 0) { - *p = '.'; - p++; - if (decimal_point_len > 1) { - rest_len = strlen(p + - (decimal_point_len - 1)); - memmove(p, p + (decimal_point_len - 1), - rest_len); - p[rest_len] = 0; - } - } - } - } + /* Get the current locale, and find the decimal point string. + Convert that string back to a dot. Do not do this if using the + 'n' (number) format code, since we want to keep the localized + decimal point in that case. */ + if (format_char != 'n') + change_decimal_from_locale_to_dot(buffer); /* If an exponent exists, ensure that the exponent is at least MIN_EXPONENT_DIGITS digits, providing the buffer is large enough for the extra zeros. Also, if there are more than MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get back to MIN_EXPONENT_DIGITS */ - p = strpbrk(buffer, "eE"); - if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { - char *start = p + 2; - int exponent_digit_cnt = 0; - int leading_zero_cnt = 0; - int in_leading_zeros = 1; - int significant_digit_cnt; - - p += 2; - while (*p && isdigit(Py_CHARMASK(*p))) { - if (in_leading_zeros && *p == '0') - ++leading_zero_cnt; - if (*p != '0') - in_leading_zeros = 0; - ++p; - ++exponent_digit_cnt; - } - - significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; - if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { - /* If there are 2 exactly digits, we're done, - regardless of what they contain */ - } - else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { - int extra_zeros_cnt; - - /* There are more than 2 digits in the exponent. See - if we can delete some of the leading zeros */ - if (significant_digit_cnt < MIN_EXPONENT_DIGITS) - significant_digit_cnt = MIN_EXPONENT_DIGITS; - extra_zeros_cnt = exponent_digit_cnt - - significant_digit_cnt; - - /* Delete extra_zeros_cnt worth of characters from the - front of the exponent */ - assert(extra_zeros_cnt >= 0); - - /* Add one to significant_digit_cnt to copy the - trailing 0 byte, thus setting the length */ - memmove(start, - start + extra_zeros_cnt, - significant_digit_cnt + 1); - } - else { - /* If there are fewer than 2 digits, add zeros - until there are 2, if there's enough room */ - int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; - if (start + zeros + exponent_digit_cnt + 1 - < buffer + buf_size) { - memmove(start + zeros, start, - exponent_digit_cnt + 1); - memset(start, '0', zeros); - } - } - } + ensure_minumim_exponent_length(buffer, buf_size); /* If format_char is 'Z', make sure we have at least one character after the decimal point (and make sure we have a decimal point). */ - if (format_char == 'Z') { - int insert_count = 0; - char* chars_to_insert; - - /* search for the first non-digit character */ - p = buffer; - while (*p && isdigit(Py_CHARMASK(*p))) - ++p; + if (format_char == 'Z') + ensure_decimal_point(buffer, buf_size); - if (*p == '.') { - if (isdigit(Py_CHARMASK(*(p+1)))) { - /* Nothing to do, we already have a decimal - point and a digit after it */ - } - else { - /* We have a decimal point, but no following - digit. Insert a zero after the decimal. */ - ++p; - chars_to_insert = "0"; - insert_count = 1; - } - } - else { - chars_to_insert = ".0"; - insert_count = 2; - } - if (insert_count) { - size_t buf_len = strlen(buffer); - if (buf_len + insert_count + 1 >= buf_size) { - /* If there is not enough room in the buffer - for the additional text, just skip it. It's - not worth generating an error over. */ - } - else { - memmove(p + insert_count, p, - buffer + strlen(buffer) - p + 1); - memcpy(p, chars_to_insert, insert_count); - } - } - } + /* If format_char is 'n', add the thousands grouping. */ + if (format_char == 'n') + if (!add_thousands_grouping(buffer, buf_size)) + return NULL; return buffer; } -- cgit v0.12