diff options
Diffstat (limited to 'Python/formatter_unicode.c')
| -rw-r--r-- | Python/formatter_unicode.c | 107 | 
1 files changed, 72 insertions, 35 deletions
| diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 617d58b..a2c2b36 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -32,14 +32,20 @@ invalid_comma_type(Py_UCS4 presentation_type)  {      if (presentation_type > 32 && presentation_type < 128)          PyErr_Format(PyExc_ValueError, -                     "Cannot specify ',' with '%c'.", +                     "Cannot specify ',' or '_' with '%c'.",                       (char)presentation_type);      else          PyErr_Format(PyExc_ValueError, -                     "Cannot specify ',' with '\\x%x'.", +                     "Cannot specify ',' or '_' with '\\x%x'.",                       (unsigned int)presentation_type);  } +static void +invalid_comma_and_underscore(void) +{ +    PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'."); +} +  /*      get_integer consumes 0 or more decimal digit characters from an      input string, updates *result with the corresponding positive @@ -48,16 +54,17 @@ invalid_comma_type(Py_UCS4 presentation_type)      returns -1 on error.  */  static int -get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end, +get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,                    Py_ssize_t *result)  { -    Py_ssize_t accumulator, digitval; +    Py_ssize_t accumulator, digitval, pos = *ppos;      int numdigits; +    int kind = PyUnicode_KIND(str); +    void *data = PyUnicode_DATA(str); +      accumulator = numdigits = 0; -    for (;;(*pos)++, numdigits++) { -        if (*pos >= end) -            break; -        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str, *pos)); +    for (; pos < end; pos++, numdigits++) { +        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));          if (digitval < 0)              break;          /* @@ -69,10 +76,12 @@ get_integer(PyObject *str, Py_ssize_t *pos, Py_ssize_t end,          if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {              PyErr_Format(PyExc_ValueError,                           "Too many decimal digits in format string"); +            *ppos = pos;              return -1;          }          accumulator = accumulator * 10 + digitval;      } +    *ppos = pos;      *result = accumulator;      return numdigits;  } @@ -105,6 +114,14 @@ is_sign_element(Py_UCS4 c)      }  } +/* Locale type codes. LT_NO_LOCALE must be zero. */ +enum LocaleType { +    LT_NO_LOCALE = 0, +    LT_DEFAULT_LOCALE, +    LT_UNDERSCORE_LOCALE, +    LT_UNDER_FOUR_LOCALE, +    LT_CURRENT_LOCALE +};  typedef struct {      Py_UCS4 fill_char; @@ -112,7 +129,7 @@ typedef struct {      int alternate;      Py_UCS4 sign;      Py_ssize_t width; -    int thousands_separators; +    enum LocaleType thousands_separators;      Py_ssize_t precision;      Py_UCS4 type;  } InternalFormatSpec; @@ -150,9 +167,11 @@ parse_internal_render_format_spec(PyObject *format_spec,                                    char default_align)  {      Py_ssize_t pos = start; +    int kind = PyUnicode_KIND(format_spec); +    void *data = PyUnicode_DATA(format_spec);      /* end-pos is used throughout this code to specify the length of         the input string */ -#define READ_spec(index) PyUnicode_READ_CHAR(format_spec, index) +#define READ_spec(index) PyUnicode_READ(kind, data, index)      Py_ssize_t consumed;      int align_specified = 0; @@ -163,7 +182,7 @@ parse_internal_render_format_spec(PyObject *format_spec,      format->alternate = 0;      format->sign = '\0';      format->width = -1; -    format->thousands_separators = 0; +    format->thousands_separators = LT_NO_LOCALE;      format->precision = -1;      format->type = default_type; @@ -218,9 +237,22 @@ parse_internal_render_format_spec(PyObject *format_spec,      /* Comma signifies add thousands separators */      if (end-pos && READ_spec(pos) == ',') { -        format->thousands_separators = 1; +        format->thousands_separators = LT_DEFAULT_LOCALE; +        ++pos; +    } +    /* Underscore signifies add thousands separators */ +    if (end-pos && READ_spec(pos) == '_') { +        if (format->thousands_separators != LT_NO_LOCALE) { +            invalid_comma_and_underscore(); +            return 0; +        } +        format->thousands_separators = LT_UNDERSCORE_LOCALE;          ++pos;      } +    if (end-pos && READ_spec(pos) == ',') { +        invalid_comma_and_underscore(); +        return 0; +    }      /* Parse field precision */      if (end-pos && READ_spec(pos) == '.') { @@ -270,6 +302,16 @@ parse_internal_render_format_spec(PyObject *format_spec,          case '\0':              /* These are allowed. See PEP 378.*/              break; +        case 'b': +        case 'o': +        case 'x': +        case 'X': +            /* Underscores are allowed in bin/oct/hex. See PEP 515. */ +            if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { +                /* Every four digits, not every three, in bin/oct/hex. */ +                format->thousands_separators = LT_UNDER_FOUR_LOCALE; +                break; +            }          default:              invalid_comma_type(format->type);              return 0; @@ -346,13 +388,6 @@ fill_padding(_PyUnicodeWriter *writer,  /*********** common routines for numeric formatting *********************/  /************************************************************************/ -/* Locale type codes. */ -enum LocaleType { -    LT_CURRENT_LOCALE, -    LT_DEFAULT_LOCALE, -    LT_NO_LOCALE -}; -  /* Locale info needed for formatting integers and the part of floats     before and including the decimal. Note that locales only support     8-bit chars, not unicode. */ @@ -404,13 +439,15 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,               Py_ssize_t *n_remainder, int *has_decimal)  {      Py_ssize_t remainder; +    int kind = PyUnicode_KIND(s); +    void *data = PyUnicode_DATA(s); -    while (pos<end && Py_ISDIGIT(PyUnicode_READ_CHAR(s, pos))) +    while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))          ++pos;      remainder = pos;      /* Does remainder start with a decimal point? */ -    *has_decimal = pos<end && PyUnicode_READ_CHAR(s, remainder) == '.'; +    *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';      /* Skip the decimal point. */      if (*has_decimal) @@ -658,12 +695,12 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,      return 0;  } -static char no_grouping[1] = {CHAR_MAX}; +static const char no_grouping[1] = {CHAR_MAX};  /* Find the decimal point character(s?), thousands_separator(s?), and     grouping description, either for the current locale if type is -   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or -   none if LT_NO_LOCALE. */ +   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or +   LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */  static int  get_locale_info(enum LocaleType type, LocaleInfo *locale_info)  { @@ -684,13 +721,19 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)          break;      }      case LT_DEFAULT_LOCALE: +    case LT_UNDERSCORE_LOCALE: +    case LT_UNDER_FOUR_LOCALE:          locale_info->decimal_point = PyUnicode_FromOrdinal('.'); -        locale_info->thousands_sep = PyUnicode_FromOrdinal(','); +        locale_info->thousands_sep = PyUnicode_FromOrdinal( +            type == LT_DEFAULT_LOCALE ? ',' : '_');          if (!locale_info->decimal_point || !locale_info->thousands_sep)              return -1; -        locale_info->grouping = "\3"; /* Group every 3 characters.  The +        if (type != LT_UNDER_FOUR_LOCALE) +            locale_info->grouping = "\3"; /* Group every 3 characters.  The                                           (implicit) trailing 0 means repeat                                           infinitely. */ +        else +            locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */          break;      case LT_NO_LOCALE:          locale_info->decimal_point = PyUnicode_FromOrdinal('.'); @@ -937,9 +980,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,      /* Determine the grouping, separator, and decimal point, if any. */      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : -                        (format->thousands_separators ? -                         LT_DEFAULT_LOCALE : -                         LT_NO_LOCALE), +                        format->thousands_separators,                          &locale) == -1)          goto done; @@ -1084,9 +1125,7 @@ format_float_internal(PyObject *value,      /* Determine the grouping, separator, and decimal point, if any. */      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : -                        (format->thousands_separators ? -                         LT_DEFAULT_LOCALE : -                         LT_NO_LOCALE), +                        format->thousands_separators,                          &locale) == -1)          goto done; @@ -1262,9 +1301,7 @@ format_complex_internal(PyObject *value,      /* Determine the grouping, separator, and decimal point, if any. */      if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : -                        (format->thousands_separators ? -                         LT_DEFAULT_LOCALE : -                         LT_NO_LOCALE), +                        format->thousands_separators,                          &locale) == -1)          goto done; | 
