summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Smith <eric@trueblade.com>2008-04-30 02:12:09 (GMT)
committerEric Smith <eric@trueblade.com>2008-04-30 02:12:09 (GMT)
commitb2c7af82211ac32295b9419f359036ccb4e819a7 (patch)
treeb9287cf4c62ffc5e4d511e2b713e1b1c8d6ce632
parentc14bb758b2f4111ab4f095d8fe2b6981bd05b185 (diff)
downloadcpython-b2c7af82211ac32295b9419f359036ccb4e819a7.zip
cpython-b2c7af82211ac32295b9419f359036ccb4e819a7.tar.gz
cpython-b2c7af82211ac32295b9419f359036ccb4e819a7.tar.bz2
Merged revisions 62586 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r62586 | eric.smith | 2008-04-29 21:09:30 -0400 (Tue, 29 Apr 2008) | 5 lines Issue 2526, float.__format__ 'n' specifier does not support thousands grouping. Implemented grouping, with tests. Cleaned up PyOS_ascii_formatd by breaking reformatting into smaller functions. ........
-rw-r--r--Lib/test/test_types.py12
-rw-r--r--Python/pystrtod.c357
2 files changed, 240 insertions, 129 deletions
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py
index dae250e..1c7a8cd 100644
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -1,8 +1,9 @@
# Python test set -- part 6, built-in types
-from test.test_support import run_unittest
+from test.test_support import run_unittest, run_with_locale
import unittest
import sys
+import locale
class TypesTests(unittest.TestCase):
@@ -407,6 +408,15 @@ class TypesTests(unittest.TestCase):
self.assertEqual(value.__format__(format_spec),
float(value).__format__(format_spec))
+ @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
+ def test_float__format__locale(self):
+ # test locale support for __format__ code 'n'
+
+ for i in range(-10, 10):
+ x = 1234567890.0 * (10.0 ** i)
+ self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
+ self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
+
def test_float__format__(self):
# these should be rewritten to use both format(x, spec) and
# x.__format__(spec)
diff --git a/Python/pystrtod.c b/Python/pystrtod.c
index 2ca8402..0912cec 100644
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
return val;
}
+/* Given a string that may have a decimal point in the current
+ locale, change it back to a dot. Since the string cannot get
+ longer, no need for a maximum buffer size parameter. */
+Py_LOCAL_INLINE(void)
+change_decimal_from_locale_to_dot(char* buffer)
+{
+ struct lconv *locale_data = localeconv();
+ const char *decimal_point = locale_data->decimal_point;
+
+ if (decimal_point[0] != '.' || decimal_point[1] != 0) {
+ size_t decimal_point_len = strlen(decimal_point);
+
+ if (*buffer == '+' || *buffer == '-')
+ buffer++;
+ while (isdigit(Py_CHARMASK(*buffer)))
+ buffer++;
+ if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
+ *buffer = '.';
+ buffer++;
+ if (decimal_point_len > 1) {
+ /* buffer needs to get smaller */
+ size_t rest_len = strlen(buffer +
+ (decimal_point_len - 1));
+ memmove(buffer,
+ buffer + (decimal_point_len - 1),
+ rest_len);
+ buffer[rest_len] = 0;
+ }
+ }
+ }
+}
+
/* From the C99 standard, section 7.19.6:
The exponent always contains at least two digits, and only as many more digits
@@ -194,6 +226,189 @@ as necessary to represent the exponent.
*/
#define MIN_EXPONENT_DIGITS 2
+/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
+ in length. */
+Py_LOCAL_INLINE(void)
+ensure_minumim_exponent_length(char* buffer, size_t buf_size)
+{
+ char *p = strpbrk(buffer, "eE");
+ if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
+ char *start = p + 2;
+ int exponent_digit_cnt = 0;
+ int leading_zero_cnt = 0;
+ int in_leading_zeros = 1;
+ int significant_digit_cnt;
+
+ /* Skip over the exponent and the sign. */
+ p += 2;
+
+ /* Find the end of the exponent, keeping track of leading
+ zeros. */
+ while (*p && isdigit(Py_CHARMASK(*p))) {
+ if (in_leading_zeros && *p == '0')
+ ++leading_zero_cnt;
+ if (*p != '0')
+ in_leading_zeros = 0;
+ ++p;
+ ++exponent_digit_cnt;
+ }
+
+ significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
+ if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
+ /* If there are 2 exactly digits, we're done,
+ regardless of what they contain */
+ }
+ else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
+ int extra_zeros_cnt;
+
+ /* There are more than 2 digits in the exponent. See
+ if we can delete some of the leading zeros */
+ if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
+ significant_digit_cnt = MIN_EXPONENT_DIGITS;
+ extra_zeros_cnt = exponent_digit_cnt -
+ significant_digit_cnt;
+
+ /* Delete extra_zeros_cnt worth of characters from the
+ front of the exponent */
+ assert(extra_zeros_cnt >= 0);
+
+ /* Add one to significant_digit_cnt to copy the
+ trailing 0 byte, thus setting the length */
+ memmove(start,
+ start + extra_zeros_cnt,
+ significant_digit_cnt + 1);
+ }
+ else {
+ /* If there are fewer than 2 digits, add zeros
+ until there are 2, if there's enough room */
+ int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
+ if (start + zeros + exponent_digit_cnt + 1
+ < buffer + buf_size) {
+ memmove(start + zeros, start,
+ exponent_digit_cnt + 1);
+ memset(start, '0', zeros);
+ }
+ }
+ }
+}
+
+/* Ensure that buffer has a decimal point in it. The decimal point
+ will not be in the current locale, it will always be '.' */
+Py_LOCAL_INLINE(void)
+ensure_decimal_point(char* buffer, size_t buf_size)
+{
+ int insert_count = 0;
+ char* chars_to_insert;
+
+ /* search for the first non-digit character */
+ char *p = buffer;
+ while (*p && isdigit(Py_CHARMASK(*p)))
+ ++p;
+
+ if (*p == '.') {
+ if (isdigit(Py_CHARMASK(*(p+1)))) {
+ /* Nothing to do, we already have a decimal
+ point and a digit after it */
+ }
+ else {
+ /* We have a decimal point, but no following
+ digit. Insert a zero after the decimal. */
+ ++p;
+ chars_to_insert = "0";
+ insert_count = 1;
+ }
+ }
+ else {
+ chars_to_insert = ".0";
+ insert_count = 2;
+ }
+ if (insert_count) {
+ size_t buf_len = strlen(buffer);
+ if (buf_len + insert_count + 1 >= buf_size) {
+ /* If there is not enough room in the buffer
+ for the additional text, just skip it. It's
+ not worth generating an error over. */
+ }
+ else {
+ memmove(p + insert_count, p,
+ buffer + strlen(buffer) - p + 1);
+ memcpy(p, chars_to_insert, insert_count);
+ }
+ }
+}
+
+/* Add the locale specific grouping characters to buffer. Note
+ that any decimal point (if it's present) in buffer is already
+ locale-specific. Return 0 on error, else 1. */
+Py_LOCAL_INLINE(int)
+add_thousands_grouping(char* buffer, size_t buf_size)
+{
+ struct lconv *locale_data = localeconv();
+ const char *grouping = locale_data->grouping;
+ const char *thousands_sep = locale_data->thousands_sep;
+ size_t thousands_sep_len = strlen(thousands_sep);
+ const char *decimal_point = locale_data->decimal_point;
+ char *pend = buffer + strlen(buffer); /* current end of buffer */
+ char *pmax = buffer + buf_size; /* max of buffer */
+ char current_grouping;
+
+ /* Find the decimal point, if any. We're only concerned
+ about the characters to the left of the decimal when
+ adding grouping. */
+ char *p = strstr(buffer, decimal_point);
+ if (!p) {
+ /* No decimal, use the entire string. */
+
+ /* If any exponent, adjust p. */
+ p = strpbrk(buffer, "eE");
+ if (!p)
+ /* No exponent and no decimal. Use the entire
+ string. */
+ p = pend;
+ }
+ /* At this point, p points just past the right-most character we
+ want to format. We need to add the grouping string for the
+ characters between buffer and p. */
+
+ /* Starting at p and working right-to-left, keep track of
+ what grouping needs to be added and insert that. */
+ current_grouping = *grouping++;
+
+ /* If the first character is 0, perform no grouping at all. */
+ if (current_grouping == 0)
+ return 1;
+
+ while (p - buffer > current_grouping) {
+ /* Always leave buffer and pend valid at the end of this
+ loop, since we might leave with a return statement. */
+
+ /* Is there room to insert thousands_sep_len chars?. */
+ if (pmax - pend <= thousands_sep_len)
+ /* No room. */
+ return 0;
+
+ /* Move the rest of the string down. */
+ p -= current_grouping;
+ memmove(p + thousands_sep_len,
+ p,
+ pend - p + 1);
+ /* Adjust end pointer. */
+ pend += thousands_sep_len;
+ /* Copy the thousands_sep chars into the buffer. */
+ memcpy(p, thousands_sep, thousands_sep_len);
+
+ /* Move to the next grouping character, unless we're
+ repeating (which is designated by a grouping of 0). */
+ if (*grouping != 0) {
+ current_grouping = *grouping++;
+ if (current_grouping == CHAR_MAX)
+ /* We're done. */
+ return 1;
+ }
+ }
+ return 1;
+}
+
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
@@ -222,7 +437,6 @@ PyOS_ascii_formatd(char *buffer,
const char *format,
double d)
{
- char *p;
char format_char;
size_t format_len = strlen(format);
@@ -277,144 +491,31 @@ PyOS_ascii_formatd(char *buffer,
/* Have PyOS_snprintf do the hard work */
PyOS_snprintf(buffer, buf_size, format, d);
- /* Get the current local, and find the decimal point character (or
- string?). Convert that string back to a dot. Do not do this if
- using the 'n' (number) format code. */
- if (format_char != 'n') {
- struct lconv *locale_data = localeconv();
- const char *decimal_point = locale_data->decimal_point;
- size_t decimal_point_len = strlen(decimal_point);
- size_t rest_len;
-
- assert(decimal_point_len != 0);
+ /* Do various fixups on the return string */
- if (decimal_point[0] != '.' || decimal_point[1] != 0) {
- p = buffer;
-
- if (*p == '+' || *p == '-')
- p++;
-
- while (isdigit(Py_CHARMASK(*p)))
- p++;
-
- if (strncmp(p, decimal_point,
- decimal_point_len) == 0) {
- *p = '.';
- p++;
- if (decimal_point_len > 1) {
- rest_len = strlen(p +
- (decimal_point_len - 1));
- memmove(p, p + (decimal_point_len - 1),
- rest_len);
- p[rest_len] = 0;
- }
- }
- }
- }
+ /* Get the current locale, and find the decimal point string.
+ Convert that string back to a dot. Do not do this if using the
+ 'n' (number) format code, since we want to keep the localized
+ decimal point in that case. */
+ if (format_char != 'n')
+ change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
for the extra zeros. Also, if there are more than
MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
back to MIN_EXPONENT_DIGITS */
- p = strpbrk(buffer, "eE");
- if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
- char *start = p + 2;
- int exponent_digit_cnt = 0;
- int leading_zero_cnt = 0;
- int in_leading_zeros = 1;
- int significant_digit_cnt;
-
- p += 2;
- while (*p && isdigit(Py_CHARMASK(*p))) {
- if (in_leading_zeros && *p == '0')
- ++leading_zero_cnt;
- if (*p != '0')
- in_leading_zeros = 0;
- ++p;
- ++exponent_digit_cnt;
- }
-
- significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
- if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
- /* If there are 2 exactly digits, we're done,
- regardless of what they contain */
- }
- else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
- int extra_zeros_cnt;
-
- /* There are more than 2 digits in the exponent. See
- if we can delete some of the leading zeros */
- if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
- significant_digit_cnt = MIN_EXPONENT_DIGITS;
- extra_zeros_cnt = exponent_digit_cnt -
- significant_digit_cnt;
-
- /* Delete extra_zeros_cnt worth of characters from the
- front of the exponent */
- assert(extra_zeros_cnt >= 0);
-
- /* Add one to significant_digit_cnt to copy the
- trailing 0 byte, thus setting the length */
- memmove(start,
- start + extra_zeros_cnt,
- significant_digit_cnt + 1);
- }
- else {
- /* If there are fewer than 2 digits, add zeros
- until there are 2, if there's enough room */
- int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
- if (start + zeros + exponent_digit_cnt + 1
- < buffer + buf_size) {
- memmove(start + zeros, start,
- exponent_digit_cnt + 1);
- memset(start, '0', zeros);
- }
- }
- }
+ ensure_minumim_exponent_length(buffer, buf_size);
/* If format_char is 'Z', make sure we have at least one character
after the decimal point (and make sure we have a decimal point). */
- if (format_char == 'Z') {
- int insert_count = 0;
- char* chars_to_insert;
-
- /* search for the first non-digit character */
- p = buffer;
- while (*p && isdigit(Py_CHARMASK(*p)))
- ++p;
+ if (format_char == 'Z')
+ ensure_decimal_point(buffer, buf_size);
- if (*p == '.') {
- if (isdigit(Py_CHARMASK(*(p+1)))) {
- /* Nothing to do, we already have a decimal
- point and a digit after it */
- }
- else {
- /* We have a decimal point, but no following
- digit. Insert a zero after the decimal. */
- ++p;
- chars_to_insert = "0";
- insert_count = 1;
- }
- }
- else {
- chars_to_insert = ".0";
- insert_count = 2;
- }
- if (insert_count) {
- size_t buf_len = strlen(buffer);
- if (buf_len + insert_count + 1 >= buf_size) {
- /* If there is not enough room in the buffer
- for the additional text, just skip it. It's
- not worth generating an error over. */
- }
- else {
- memmove(p + insert_count, p,
- buffer + strlen(buffer) - p + 1);
- memcpy(p, chars_to_insert, insert_count);
- }
- }
- }
+ /* If format_char is 'n', add the thousands grouping. */
+ if (format_char == 'n')
+ if (!add_thousands_grouping(buffer, buf_size))
+ return NULL;
return buffer;
}