diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2012-02-23 23:37:51 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2012-02-23 23:37:51 (GMT) |
commit | 41a863cb81608c779d60b49e7be8a115816734fc (patch) | |
tree | b59ae94894190d8863ffd2081122db4fe819e003 /Python | |
parent | dcb30cf959902fdc3da027c13a99e091d116c273 (diff) | |
download | cpython-41a863cb81608c779d60b49e7be8a115816734fc.zip cpython-41a863cb81608c779d60b49e7be8a115816734fc.tar.gz cpython-41a863cb81608c779d60b49e7be8a115816734fc.tar.bz2 |
Issue #13706: Fix format(int, "n") for locale with non-ASCII thousands separator
* Decode thousands separator and decimal point using PyUnicode_DecodeLocale()
(from the locale encoding), instead of decoding them implicitly from latin1
* Remove _PyUnicode_InsertThousandsGroupingLocale(), it was not used
* Change _PyUnicode_InsertThousandsGrouping() API to return the maximum
character if unicode is NULL
* Replace MIN/MAX macros by Py_MIN/Py_MAX
* stringlib/undef.h undefines STRINGLIB_IS_UNICODE
* stringlib/localeutil.h only supports Unicode
Diffstat (limited to 'Python')
-rw-r--r-- | Python/formatter_unicode.c | 142 |
1 files changed, 89 insertions, 53 deletions
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index ed716a5..94f8047 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -346,11 +346,13 @@ fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars, before and including the decimal. Note that locales only support 8-bit chars, not unicode. */ typedef struct { - char *decimal_point; - char *thousands_sep; - char *grouping; + PyObject *decimal_point; + PyObject *thousands_sep; + const char *grouping; } LocaleInfo; +#define STATIC_LOCALE_INFO_INIT {0, 0, 0} + /* describes the layout for an integer, see the comment in calc_number_widths() for details */ typedef struct { @@ -415,7 +417,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, Py_ssize_t n_end, Py_ssize_t n_remainder, int has_decimal, const LocaleInfo *locale, - const InternalFormatSpec *format) + const InternalFormatSpec *format, Py_UCS4 *maxchar) { Py_ssize_t n_non_digit_non_padding; Py_ssize_t n_padding; @@ -423,7 +425,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0); spec->n_lpadding = 0; spec->n_prefix = n_prefix; - spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0; + spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0; spec->n_remainder = n_remainder; spec->n_spadding = 0; spec->n_rpadding = 0; @@ -484,11 +486,15 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, to special case it because the grouping code always wants to have at least one character. */ spec->n_grouped_digits = 0; - else + else { + Py_UCS4 grouping_maxchar; spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( - NULL, PyUnicode_1BYTE_KIND, NULL, 0, NULL, + NULL, 0, + 0, NULL, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep); + locale->grouping, locale->thousands_sep, &grouping_maxchar); + *maxchar = Py_MAX(*maxchar, grouping_maxchar); + } /* Given the desired width and the total of digit and non-digit space we consume, see if we need any padding. format->width can @@ -519,6 +525,10 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, break; } } + + if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding) + *maxchar = Py_MAX(*maxchar, format->fill_char); + return spec->n_lpadding + spec->n_sign + spec->n_prefix + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + spec->n_remainder + spec->n_rpadding; @@ -587,12 +597,11 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, r = #endif _PyUnicode_InsertThousandsGrouping( - out, kind, - (char*)data + kind * pos, + out, pos, spec->n_grouped_digits, pdigits + kind * d_pos, spec->n_digits, spec->n_min_width, - locale->grouping, locale->thousands_sep); + locale->grouping, locale->thousands_sep, NULL); #ifndef NDEBUG assert(r == spec->n_grouped_digits); #endif @@ -615,10 +624,8 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, pos += spec->n_grouped_digits; if (spec->n_decimal) { - Py_ssize_t t; - for (t = 0; t < spec->n_decimal; ++t) - PyUnicode_WRITE(kind, data, pos + t, - locale->decimal_point[t]); + if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0) + return -1; pos += spec->n_decimal; d_pos += 1; } @@ -643,32 +650,60 @@ static char no_grouping[1] = {CHAR_MAX}; grouping description, either for the current locale if type is LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or none if LT_NO_LOCALE. */ -static void +static int get_locale_info(int type, LocaleInfo *locale_info) { switch (type) { case LT_CURRENT_LOCALE: { struct lconv *locale_data = localeconv(); - locale_info->decimal_point = locale_data->decimal_point; - locale_info->thousands_sep = locale_data->thousands_sep; + locale_info->decimal_point = PyUnicode_DecodeLocale( + locale_data->decimal_point, + NULL); + if (locale_info->decimal_point == NULL) + return -1; + locale_info->thousands_sep = PyUnicode_DecodeLocale( + locale_data->thousands_sep, + NULL); + if (locale_info->thousands_sep == NULL) { + Py_DECREF(locale_info->decimal_point); + return -1; + } locale_info->grouping = locale_data->grouping; break; } case LT_DEFAULT_LOCALE: - locale_info->decimal_point = "."; - locale_info->thousands_sep = ","; + locale_info->decimal_point = PyUnicode_FromOrdinal('.'); + locale_info->thousands_sep = PyUnicode_FromOrdinal(','); + if (!locale_info->decimal_point || !locale_info->thousands_sep) { + Py_XDECREF(locale_info->decimal_point); + Py_XDECREF(locale_info->thousands_sep); + return -1; + } locale_info->grouping = "\3"; /* Group every 3 characters. The (implicit) trailing 0 means repeat infinitely. */ break; case LT_NO_LOCALE: - locale_info->decimal_point = "."; - locale_info->thousands_sep = ""; + locale_info->decimal_point = PyUnicode_FromOrdinal('.'); + locale_info->thousands_sep = PyUnicode_New(0, 0); + if (!locale_info->decimal_point || !locale_info->thousands_sep) { + Py_XDECREF(locale_info->decimal_point); + Py_XDECREF(locale_info->thousands_sep); + return -1; + } locale_info->grouping = no_grouping; break; default: assert(0); } + return 0; +} + +static void +free_locale_info(LocaleInfo *locale_info) +{ + Py_XDECREF(locale_info->decimal_point); + Py_XDECREF(locale_info->thousands_sep); } /************************************************************************/ @@ -769,7 +804,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ - LocaleInfo locale; + LocaleInfo locale = STATIC_LOCALE_INFO_INIT; /* no precision allowed on integers */ if (format->precision != -1) { @@ -868,18 +903,17 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, } /* Determine the grouping, separator, and decimal point, if any. */ - get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), - &locale); + if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale) == -1) + goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, - inumeric_chars + n_digits, n_remainder, 0, &locale, format); - - if (spec.n_lpadding || spec.n_spadding || spec.n_rpadding) - maxchar = Py_MAX(maxchar, format->fill_char); + inumeric_chars + n_digits, n_remainder, 0, + &locale, format, &maxchar); /* Allocate the memory. */ result = PyUnicode_New(n_total, maxchar); @@ -897,6 +931,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, done: Py_XDECREF(tmp); + free_locale_info(&locale); assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -938,7 +973,7 @@ format_float_internal(PyObject *value, /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ - LocaleInfo locale; + LocaleInfo locale = STATIC_LOCALE_INFO_INIT; if (format->alternate) flags |= Py_DTSF_ALT; @@ -1009,19 +1044,17 @@ format_float_internal(PyObject *value, parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ - get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), - &locale); + if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale) == -1) + goto done; /* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, index + n_digits, n_remainder, has_decimal, - &locale, format); - - if (spec.n_lpadding || spec.n_spadding || spec.n_rpadding) - maxchar = Py_MAX(maxchar, format->fill_char); + &locale, format, &maxchar); /* Allocate the memory. */ result = PyUnicode_New(n_total, maxchar); @@ -1040,6 +1073,7 @@ format_float_internal(PyObject *value, done: PyMem_Free(buf); Py_DECREF(unicode_tmp); + free_locale_info(&locale); assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -1094,7 +1128,7 @@ format_complex_internal(PyObject *value, /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ - LocaleInfo locale; + LocaleInfo locale = STATIC_LOCALE_INFO_INIT; /* Zero padding is not allowed. */ if (format->fill_char == '0') { @@ -1190,11 +1224,12 @@ format_complex_internal(PyObject *value, &n_im_remainder, &im_has_decimal); /* Determine the grouping, separator, and decimal point, if any. */ - get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), - &locale); + if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale) == -1) + goto done; /* Turn off any padding. We'll do it later after we've composed the numbers without padding. */ @@ -1205,7 +1240,8 @@ format_complex_internal(PyObject *value, /* Calculate how much memory we'll need. */ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp, i_re, i_re + n_re_digits, n_re_remainder, - re_has_decimal, &locale, &tmp_format); + re_has_decimal, &locale, &tmp_format, + &maxchar); /* Same formatting, but always include a sign, unless the real part is * going to be omitted, in which case we use whatever sign convention was @@ -1214,7 +1250,8 @@ format_complex_internal(PyObject *value, tmp_format.sign = '+'; n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp, i_im, i_im + n_im_digits, n_im_remainder, - im_has_decimal, &locale, &tmp_format); + im_has_decimal, &locale, &tmp_format, + &maxchar); if (skip_re) n_re_total = 0; @@ -1223,9 +1260,7 @@ format_complex_internal(PyObject *value, calc_padding(n_re_total + n_im_total + 1 + add_parens * 2, format->width, format->align, &lpad, &rpad, &total); - if (re_spec.n_lpadding || re_spec.n_spadding || re_spec.n_rpadding - || im_spec.n_lpadding || im_spec.n_spadding || im_spec.n_rpadding - || lpad || rpad) + if (lpad || rpad) maxchar = Py_MAX(maxchar, format->fill_char); result = PyUnicode_New(total, maxchar); @@ -1275,6 +1310,7 @@ done: PyMem_Free(im_buf); Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp); + free_locale_info(&locale); assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } |