diff options
Diffstat (limited to 'Python/formatter_unicode.c')
-rw-r--r-- | Python/formatter_unicode.c | 107 |
1 files changed, 72 insertions, 35 deletions
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 617d58b..a2c2b36 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -32,14 +32,20 @@ invalid_comma_type(Py_UCS4 presentation_type) { if (presentation_type > 32 && presentation_type < 128) PyErr_Format(PyExc_ValueError, - "Cannot specify ',' with '%c'.", + "Cannot specify ',' or '_' with '%c'.", (char)presentation_type); else PyErr_Format(PyExc_ValueError, - "Cannot specify ',' with '\\x%x'.", + "Cannot specify ',' or '_' with '\\x%x'.", (unsigned int)presentation_type); } +static void +invalid_comma_and_underscore(void) +{ + PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'."); +} + /* get_integer consumes 0 or more decimal digit characters from an input string, updates *result with the corresponding positive @@ -48,16 +54,17 @@ invalid_comma_type(Py_UCS4 presentation_type) returns -1 on error. */ static int -get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end, +get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end, Py_ssize_t *result) { - Py_ssize_t accumulator, digitval; + Py_ssize_t accumulator, digitval, pos = *ppos; int numdigits; + int kind = PyUnicode_KIND(str); + void *data = PyUnicode_DATA(str); + accumulator = numdigits = 0; - for (;;(*pos)++, numdigits++) { - if (*pos >= end) - break; - digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos)); + for (; pos < end; pos++, numdigits++) { + digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos)); if (digitval < 0) break; /* @@ -69,10 +76,12 @@ get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end, if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { PyErr_Format(PyExc_ValueError, "Too many decimal digits in format string"); + *ppos = pos; return -1; } accumulator = accumulator * 10 + digitval; } + *ppos = pos; *result = accumulator; return numdigits; } @@ -105,6 +114,14 @@ is_sign_element(Py_UCS4 c) } } +/* Locale type codes. LT_NO_LOCALE must be zero. */ +enum LocaleType { + LT_NO_LOCALE = 0, + LT_DEFAULT_LOCALE, + LT_UNDERSCORE_LOCALE, + LT_UNDER_FOUR_LOCALE, + LT_CURRENT_LOCALE +}; typedef struct { Py_UCS4 fill_char; @@ -112,7 +129,7 @@ typedef struct { int alternate; Py_UCS4 sign; Py_ssize_t width; - int thousands_separators; + enum LocaleType thousands_separators; Py_ssize_t precision; Py_UCS4 type; } InternalFormatSpec; @@ -150,9 +167,11 @@ parse_internal_render_format_spec(PyObject *format_spec, char default_align) { Py_ssize_t pos = start; + int kind = PyUnicode_KIND(format_spec); + void *data = PyUnicode_DATA(format_spec); /* end-pos is used throughout this code to specify the length of the input string */ -#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index) +#define READ_spec(index) PyUnicode_READ(kind, data, index) Py_ssize_t consumed; int align_specified = 0; @@ -163,7 +182,7 @@ parse_internal_render_format_spec(PyObject *format_spec, format->alternate = 0; format->sign = '\0'; format->width = -1; - format->thousands_separators = 0; + format->thousands_separators = LT_NO_LOCALE; format->precision = -1; format->type = default_type; @@ -218,9 +237,22 @@ parse_internal_render_format_spec(PyObject *format_spec, /* Comma signifies add thousands separators */ if (end-pos && READ_spec(pos) == ',') { - format->thousands_separators = 1; + format->thousands_separators = LT_DEFAULT_LOCALE; + ++pos; + } + /* Underscore signifies add thousands separators */ + if (end-pos && READ_spec(pos) == '_') { + if (format->thousands_separators != LT_NO_LOCALE) { + invalid_comma_and_underscore(); + return 0; + } + format->thousands_separators = LT_UNDERSCORE_LOCALE; ++pos; } + if (end-pos && READ_spec(pos) == ',') { + invalid_comma_and_underscore(); + return 0; + } /* Parse field precision */ if (end-pos && READ_spec(pos) == '.') { @@ -270,6 +302,16 @@ parse_internal_render_format_spec(PyObject *format_spec, case '\0': /* These are allowed. See PEP 378.*/ break; + case 'b': + case 'o': + case 'x': + case 'X': + /* Underscores are allowed in bin/oct/hex. See PEP 515. */ + if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { + /* Every four digits, not every three, in bin/oct/hex. */ + format->thousands_separators = LT_UNDER_FOUR_LOCALE; + break; + } default: invalid_comma_type(format->type); return 0; @@ -346,13 +388,6 @@ fill_padding(_PyUnicodeWriter *writer, /*********** common routines for numeric formatting *********************/ /************************************************************************/ -/* Locale type codes. */ -enum LocaleType { - LT_CURRENT_LOCALE, - LT_DEFAULT_LOCALE, - LT_NO_LOCALE -}; - /* Locale info needed for formatting integers and the part of floats before and including the decimal. Note that locales only support 8-bit chars, not unicode. */ @@ -404,13 +439,15 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, Py_ssize_t *n_remainder, int *has_decimal) { Py_ssize_t remainder; + int kind = PyUnicode_KIND(s); + void *data = PyUnicode_DATA(s); - while (pos<end && Py_ISDIGIT(PyUnicode_READ_CHAR(s, pos))) + while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) ++pos; remainder = pos; /* Does remainder start with a decimal point? */ - *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.'; + *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.'; /* Skip the decimal point. */ if (*has_decimal) @@ -658,12 +695,12 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, return 0; } -static char no_grouping[1] = {CHAR_MAX}; +static const char no_grouping[1] = {CHAR_MAX}; /* Find the decimal point character(s?), thousands_separator(s?), and grouping description, either for the current locale if type is - LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or - none if LT_NO_LOCALE. */ + LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or + LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int get_locale_info(enum LocaleType type, LocaleInfo *locale_info) { @@ -684,13 +721,19 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info) break; } case LT_DEFAULT_LOCALE: + case LT_UNDERSCORE_LOCALE: + case LT_UNDER_FOUR_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.'); - locale_info->thousands_sep = PyUnicode_FromOrdinal(','); + locale_info->thousands_sep = PyUnicode_FromOrdinal( + type == LT_DEFAULT_LOCALE ? ',' : '_'); if (!locale_info->decimal_point || !locale_info->thousands_sep) return -1; - locale_info->grouping = "\3"; /* Group every 3 characters. The + if (type != LT_UNDER_FOUR_LOCALE) + locale_info->grouping = "\3"; /* Group every 3 characters. The (implicit) trailing 0 means repeat infinitely. */ + else + locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */ break; case LT_NO_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.'); @@ -937,9 +980,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done; @@ -1084,9 +1125,7 @@ format_float_internal(PyObject *value, /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done; @@ -1262,9 +1301,7 @@ format_complex_internal(PyObject *value, /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done; |